From 3ef5894d8910f9273f1ee7aaa744aa55928d38c0 Mon Sep 17 00:00:00 2001 From: Zofia Zientek Date: Tue, 17 Oct 2023 17:41:11 +0200 Subject: [PATCH] Dodanie kodu mechanizmu wyliczania ceny --- Mieszkania.ipynb | 212 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 Mieszkania.ipynb diff --git a/Mieszkania.ipynb b/Mieszkania.ipynb new file mode 100644 index 0000000..6a15016 --- /dev/null +++ b/Mieszkania.ipynb @@ -0,0 +1,212 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "Ładowanie danych:" + ], + "metadata": { + "id": "coWdAJZAPC1C" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bozs99nnO2jv", + "outputId": "4119ebc8-eccf-4574-866c-2502176e0fbd" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "fatal: destination path 'mieszkania5' already exists and is not an empty directory.\n" + ] + } + ], + "source": [ + "!git clone git://gonito.net/mieszkania5" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Importy:" + ], + "metadata": { + "id": "OFaZTYDGQqLQ" + } + }, + { + "cell_type": "code", + "source": [ + "import csv\n", + "import pandas as pd\n", + "import numpy as np\n", + "\n", + "data = pd.read_table(\"mieszkania5/train/train.tsv\", delimiter='\\t', header=None)\n", + "data.rename(columns={0: 'cena', 1: 'stan', 2: 'czynsz', 3: 'x3', 4: 'cenazam', 5: 'link', 6: 'pietro', 7: 'x7', 8: 'metraz', 9: 'rynek', 10: 'liczba pokoi', 11: 'budynek', 12: 'x12', 13: 'x13', 14: 'x14', 15: 'x15', 16: 'x16', 17: 'x17', 18: 'x18', 19: 'x19', 20: 'x20', 21: 'x21', 22: 'x22', 23: 'x23', 24: 'x24', 25: 'x25'}, inplace=True)\n", + "\n", + "data.drop('x3', inplace=True, axis=1)\n", + "data.drop('cenazam', inplace=True, axis=1)\n", + "data.drop('link', inplace=True, axis=1)\n", + "data.drop('pietro', inplace=True, axis=1)\n", + "data.drop('budynek', inplace=True, axis=1)\n", + "data.drop('x7', inplace=True, axis=1)\n", + "data.drop('x12', inplace=True, axis=1)\n", + "data.drop('x13', inplace=True, axis=1)\n", + "data.drop('x14', inplace=True, axis=1)\n", + "data.drop('x15', inplace=True, axis=1)\n", + "data.drop('x16', inplace=True, axis=1)\n", + "data.drop('x17', inplace=True, axis=1)\n", + "data.drop('x18', inplace=True, axis=1)\n", + "data.drop('x19', inplace=True, axis=1)\n", + "data.drop('x20', inplace=True, axis=1)\n", + "data.drop('x21', inplace=True, axis=1)\n", + "data.drop('x22', inplace=True, axis=1)\n", + "data.drop('x23', inplace=True, axis=1)\n", + "data.drop('x24', inplace=True, axis=1)\n", + "data.drop('x25', inplace=True, axis=1)\n", + "\n", + "data['czynsz'] = data['czynsz'].str.extract('(\\d+)')\n", + "data['stan'] = data['stan'].map({'do zamieszkania': 2, 'do remontu': 1, 'do wykończenia': 2})\n", + "data['rynek'] = data['rynek'].map({'wtórny': 0, 'pierwotny': 1})\n", + "\n", + "data.dropna(inplace=True)" + ], + "metadata": { + "id": "K-TUB0pAPCp2" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "57zFDlw7PDDb" + } + }, + { + "cell_type": "code", + "source": [ + "cena = data['cena']\n", + "parametry = data[['stan', 'czynsz', 'liczba pokoi', 'metraz', 'rynek']]" + ], + "metadata": { + "id": "___F5VBeco6H" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from sklearn.linear_model import LinearRegression" + ], + "metadata": { + "id": "H1shMEsxTccr" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def train_model(cena, parametry):\n", + " model = LinearRegression()\n", + " model.fit(X=parametry, y=cena)\n", + " return model" + ], + "metadata": { + "id": "vT9sCZ2XTjKy" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "model = train_model(cena, parametry)" + ], + "metadata": { + "id": "-DZ-HNMtUBmr" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def predict(stan, czynsz, liczba_pokoi, metraz, rynek):\n", + " return model.predict(np.array([[stan, czynsz, liczba_pokoi, metraz, rynek]])).item()" + ], + "metadata": { + "id": "oK_ZW9N9Wg2u" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "predict(1, 200, 2, 40.0, 0)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bLmRBRBMgFTg", + "outputId": "f94f3691-9a2a-4035-b3ad-dde097631e85" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "217119.72285625804" + ] + }, + "metadata": {}, + "execution_count": 60 + } + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "K7eEdZFzgI3n" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file