diff --git a/cw/07_regresja_liniowa.ipynb b/cw/07_regresja_liniowa.ipynb new file mode 100644 index 0000000..bf86c4b --- /dev/null +++ b/cw/07_regresja_liniowa.ipynb @@ -0,0 +1,1086 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n", + "
\n", + "

Ekstrakcja informacji

\n", + "

7. Regresja liniowa [ćwiczenia]

\n", + "

Jakub Pokrywka (2021)

\n", + "
\n", + "\n", + "![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Regresja liniowa" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## import bibliotek" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from pathlib import Path\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from sklearn.linear_model import LinearRegression\n", + "plt.rcParams['figure.figsize'] = [10, 5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Zbiór \n", + "\n", + "https://git.wmi.amu.edu.pl/kubapok/mieszkania2-below1m-public" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ładowanie zbioru train" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "DATA_DIR = Path('/home/kuba/Syncthing/przedmioty/2020-02/ISI/zajecia7_regresja_liniowa/mieszkania2')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "with open(DATA_DIR / 'names') as f_names:\n", + " names = f_names.read().rstrip('\\n').split('\\t')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "mieszkania_train = pd.read_csv(DATA_DIR/'train/in.tsv', sep ='\\t', names=names)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
isNewroomsfloorlocationsqrMetres
0False31Centrum78
1False32Sołacz62
2False30Sołacz15
3False40Sołacz14
4False30Sołacz15
\n", + "
" + ], + "text/plain": [ + " isNew rooms floor location sqrMetres\n", + "0 False 3 1 Centrum 78\n", + "1 False 3 2 Sołacz 62\n", + "2 False 3 0 Sołacz 15\n", + "3 False 4 0 Sołacz 14\n", + "4 False 3 0 Sołacz 15" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mieszkania_train.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "with open(DATA_DIR/'train'/'expected.tsv','r') as train_exp_f:\n", + " Y_train = np.array([float(x.rstrip('\\n')) for x in train_exp_f.readlines()])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([476118., 459531., 411557., ..., 320000., 364000., 209000.])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Y_train" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "mieszkania_train['price'] = Y_train" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "X_train = mieszkania_train['sqrMetres'].to_numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Wizualizacja danych" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
isNewroomsfloorlocationsqrMetresprice
0False31Centrum78476118.0
1False32Sołacz62459531.0
2False30Sołacz15411557.0
3False40Sołacz14496416.0
4False30Sołacz15406032.0
.....................
1652True20Grunwald51299000.0
1653True22Centrum53339000.0
1654True34Stare65320000.0
1655True31Nowe67364000.0
1656True33Grunwald50209000.0
\n", + "

1657 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " isNew rooms floor location sqrMetres price\n", + "0 False 3 1 Centrum 78 476118.0\n", + "1 False 3 2 Sołacz 62 459531.0\n", + "2 False 3 0 Sołacz 15 411557.0\n", + "3 False 4 0 Sołacz 14 496416.0\n", + "4 False 3 0 Sołacz 15 406032.0\n", + "... ... ... ... ... ... ...\n", + "1652 True 2 0 Grunwald 51 299000.0\n", + "1653 True 2 2 Centrum 53 339000.0\n", + "1654 True 3 4 Stare 65 320000.0\n", + "1655 True 3 1 Nowe 67 364000.0\n", + "1656 True 3 3 Grunwald 50 209000.0\n", + "\n", + "[1657 rows x 6 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mieszkania_train" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.scatterplot(x='sqrMetres',y='price', data = mieszkania_train, linewidth = 0, s = 5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pytanie- Jaki jest baseline naszego systemu?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Czym jest regresja liniowa?- przypadek jednowymiarowym\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![regresja liniowa 1](obrazki/1.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![regresja liniowa 2](obrazki/2.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![regresja liniowa 3](obrazki/3.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![regresja liniowa 4](obrazki/4.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## wzór na regresję w przypadku jednowymiarowym?\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$Y = a*X_1 + b$\n", + "\n", + "$Y = w_1 * X_1 + w_0$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Zadanie - napisać funkcję predict_score(sqr_metres) która zwraca cenę mieszkania zgodnie z modelem regresji liniowej ( 5 minut) \n", + "\n", + "Należy samemu wymyślić współczynniki modelu" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "def predict_price(sqr_metres):\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "predict_price(20)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "predict_price(40)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "predict_price(55)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "predict_price(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "Y_train_predicted = predict_price(X_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Mierzenie błędu" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![RMSE 2](obrazki/6.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![RMSE 2](obrazki/5.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Zadanie - napisać funkcję, która liczy błąd średniowadratowy na całym zbiorze (7 minut)\n", + "\n", + "rmse(Y_true, Y_predicted)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "def rmse(Y_true, Y_predicted):\n", + " pass " + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "rmse(np.array([300_000, 250_000]), np.array([300_000, 250_000]))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "rmse(np.array([305_000, 250_000]) ,np.array([300_000, 350_000]) )" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "rmse(np.array([300_000, 250_000]), np.array([330_000, 360_000]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Zadanie - za pomocą rmse policzyć błąd dla baseline (3 minuty)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Zadanie - za pomocą rmse policzyc błąd dla predykcji (2 minuty)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Na jakim zbiorze najlepiej sprawdzać wyniki?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![a](obrazki/7.png)\n", + "\n", + "![a](obrazki/8.png)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "mieszkania_dev = pd.read_csv(DATA_DIR/'dev-0'/'in.tsv', sep = '\\t', names = names)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "with open(DATA_DIR/'dev-0'/'expected.tsv','r') as dev_exp_f:\n", + " Y_dev = np.array([float(x.rstrip('\\n')) for x in dev_exp_f.readlines()])" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "mieszkania_dev['price'] = Y_dev" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "X_dev = mieszkania_dev['sqrMetres'].to_numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.scatterplot(x='sqrMetres',y='price', data = mieszkania_dev, linewidth = 0, s = 5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Zadanie - policzyć rmse dla predykcji ze zbioru deweloperskiego modelu baseline i naszego modelu regresji liniowej" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Przypadek wielowymiarowy" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.scatterplot(x='floor',y='price', data = mieszkania_train, linewidth = 0, s = 5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$Y = w_1 * X_1 + w_2 * X_1 + w_3 * X_3 + w_0$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Zadanie. Napisać analogiczną funkcję predict_price(sqr_metres, floor), policzyć rmse dla takiego modelu ( 7 minut)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## jak dobrać najlepsze parametry?" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.lmplot(x='sqrMetres',y='price', data = mieszkania_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "lm_model = LinearRegression()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lm_model.fit(mieszkania_train[['isNew','rooms', 'floor', 'sqrMetres']], Y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "Y_train_predicted = lm_model.predict(mieszkania_train[['isNew','rooms', 'floor', 'sqrMetres']])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [], + "source": [ + "rmse(Y_train, Y_train_predicted)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "Y_dev_predicted = lm_model.predict(mieszkania_dev[['isNew','rooms', 'floor', 'sqrMetres']])" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "rmse(Y_dev, Y_dev_predicted)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([469449.27836213])" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lm_model.predict(np.array(([[0, 4, 3, 70]])))" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([455982.54297977])" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lm_model.predict(np.array(([[0, 4, 3, 60]])))" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 4522.65059749, 73763.4125433 , -78.83243119, 1346.67353824])" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lm_model.coef_" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "80364.97780599026" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lm_model.intercept_" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "455982.5429800203" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "0 * 4522.65059749 + 4* 73763.4125433 + 3 * (-78.83243119) + 60 * 1346.67353824 + 80364.97780599032" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "with open(DATA_DIR/'dev-0'/'out.tsv','w') as f_out_file:\n", + " for line in Y_dev_predicted:\n", + " f_out_file.write(str(line))\n", + " f_out_file.write('\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Uwaga - regresja linowa działa dobrze tylko dla danych, gdzie występuje korelacja liniowa" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![RMSE 3](obrazki/9.png)\n", + "\n", + "![RMSE 4](obrazki/10.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Zadanie domowe\n", + "\n", + "\n", + "- https://gonito.net/challenge/retroc2\n", + "- termin 17.05\n", + "- należy użyć wektoryzacji (np tf-dif)\n", + "- wynik zaliczający to max 50 RMSE dla dev-0 \n", + "- punkty: 60, dla 3 najlepszych wyników na test-A: 80,\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "author": "Jakub Pokrywka", + "email": "kubapok@wmi.amu.edu.pl", + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "lang": "pl", + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + }, + "subtitle": "7.Regresja liniowa[ćwiczenia]", + "title": "Ekstrakcja informacji", + "year": "2021" + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/cw/07_regresja_liniowa_ODPOWIEDZI.ipynb b/cw/07_regresja_liniowa_ODPOWIEDZI.ipynb new file mode 100644 index 0000000..13217b2 --- /dev/null +++ b/cw/07_regresja_liniowa_ODPOWIEDZI.ipynb @@ -0,0 +1,1416 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n", + "
\n", + "

Ekstrakcja informacji

\n", + "

7. Regresja liniowa [ćwiczenia]

\n", + "

Jakub Pokrywka (2021)

\n", + "
\n", + "\n", + "![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Regresja liniowa" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## import bibliotek" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from pathlib import Path\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from sklearn.linear_model import LinearRegression\n", + "plt.rcParams['figure.figsize'] = [10, 5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Zbiór \n", + "\n", + "https://git.wmi.amu.edu.pl/kubapok/mieszkania2-below1m-public" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ładowanie zbioru train" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "DATA_DIR = Path('/home/kuba/Syncthing/przedmioty/2020-02/ISI/zajecia7_regresja_liniowa/mieszkania2')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "with open(DATA_DIR / 'names') as f_names:\n", + " names = f_names.read().rstrip('\\n').split('\\t')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "mieszkania_train = pd.read_csv(DATA_DIR/'train/in.tsv', sep ='\\t', names=names)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
isNewroomsfloorlocationsqrMetres
0False31Centrum78
1False32Sołacz62
2False30Sołacz15
3False40Sołacz14
4False30Sołacz15
\n", + "
" + ], + "text/plain": [ + " isNew rooms floor location sqrMetres\n", + "0 False 3 1 Centrum 78\n", + "1 False 3 2 Sołacz 62\n", + "2 False 3 0 Sołacz 15\n", + "3 False 4 0 Sołacz 14\n", + "4 False 3 0 Sołacz 15" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mieszkania_train.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "with open(DATA_DIR/'train'/'expected.tsv','r') as train_exp_f:\n", + " Y_train = np.array([float(x.rstrip('\\n')) for x in train_exp_f.readlines()])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([476118., 459531., 411557., ..., 320000., 364000., 209000.])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Y_train" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "mieszkania_train['price'] = Y_train" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "X_train = mieszkania_train['sqrMetres'].to_numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Wizualizacja danych" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
isNewroomsfloorlocationsqrMetresprice
0False31Centrum78476118.0
1False32Sołacz62459531.0
2False30Sołacz15411557.0
3False40Sołacz14496416.0
4False30Sołacz15406032.0
.....................
1652True20Grunwald51299000.0
1653True22Centrum53339000.0
1654True34Stare65320000.0
1655True31Nowe67364000.0
1656True33Grunwald50209000.0
\n", + "

1657 rows × 6 columns

\n", + "
" + ], + "text/plain": [ + " isNew rooms floor location sqrMetres price\n", + "0 False 3 1 Centrum 78 476118.0\n", + "1 False 3 2 Sołacz 62 459531.0\n", + "2 False 3 0 Sołacz 15 411557.0\n", + "3 False 4 0 Sołacz 14 496416.0\n", + "4 False 3 0 Sołacz 15 406032.0\n", + "... ... ... ... ... ... ...\n", + "1652 True 2 0 Grunwald 51 299000.0\n", + "1653 True 2 2 Centrum 53 339000.0\n", + "1654 True 3 4 Stare 65 320000.0\n", + "1655 True 3 1 Nowe 67 364000.0\n", + "1656 True 3 3 Grunwald 50 209000.0\n", + "\n", + "[1657 rows x 6 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mieszkania_train" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmEAAAFICAYAAAAYvikoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAABISklEQVR4nO3dfXwV1b0v/s834UEeAkGDQQlCgIBEVJTUSilWW3w6cMQe5dxaK9aHek89pxe19R49cKmHC6ft9fyq9vyK/rxUK31SsRUsaK148EApWkNFnlJIJFDCQwAhECBIJOv3x8zerD3JTNaezOyZvffn/XrxGpK99+y110wy36z1ne8SpRSIiIiIKLMKom4AERERUT5iEEZEREQUAQZhRERERBFgEEZEREQUAQZhRERERBFgEEZEREQUgawMwkTkORHZLyKbDJ//9yKyRUQ2i8gvw24fERERUWckG+uEichVAI4BWKSUGtvJcysAvAzgi0qpwyJyrlJqfybaSUREROQmK0fClFKrABzSvyciI0TkdyKyTkRWi8iF9kPfAPBjpdRh+7UMwIiIiChyWRmEuXgWwLeUUuMBfAfAAvv7owCMEpE1IvKuiNwQWQuJiIiIbN2ibkAQRKQvgM8BWCwiiW/3tLfdAFQAuBpAGYBVInKxUqopw80kIiIiSsqJIAzWiF6TUmpcB481AHhPKdUKoF5EtsEKyt7PYPuIiIiIUuTEdKRS6iisAGs6AIjlUvvhJbBGwSAiJbCmJ7dH0EwiIiKipKwMwkTkVwDWAhgtIg0icg+A2wHcIyIfAtgMYJr99DcBfCwiWwCsBPCwUurjKNpNRERElJCVJSqIiIiIsl1WjoQRERERZTsGYUREREQRyLq7I0tKStSwYcOibgYRERFRp9atW3dQKTWwo8eyLggbNmwYqquro24GERERUadEZKfbY5yOJCIiIooAgzAiIiKiCDAIIyIiIooAgzAiIiKiCDAIIyIiIooAgzAiIiKiCIQWhInIcyKyX0Q2uTwuIvIjEakTkQ0icnlYbSEiIiKKmzBHwn4K4AaPx28EUGH/uw/A0yG2hYiIiChWQgvClFKrABzyeMo0AIuU5V0AxSJyXljtISIiIoqTKHPCBgPYpX3dYH+PiIhs9QePY87STag/eDzqphBRwLIiMV9E7hORahGpPnDgQNTNISLKmOfX1GPR2p14fk191E0hooBFGYTtBjBE+7rM/l47SqlnlVJVSqmqgQM7XAMzK/EvXKLctrr2AK5/YhVW1/r/4/GuieWYMWEo7ppYHmDLiCgOogzCXgMww75L8koAR5RSeyNsT8bxL1yi3DZvWQ22NjZj3rIa3/soL+mDudPGorykT4AtI6I46BbWjkXkVwCuBlAiIg0AvgugOwAopZ4B8DqAvwFQB+AEgLvCaktcJf6y5V+4RLlp9tQxmLesBrOnjom6KUQUQ6KUiroNaamqqlLV1dVRN4OIiIioUyKyTilV1dFjWZGYT0REwWNeKlG0GIQREeUp5qUSRSu0nDAiIoo35qUSRYtBGBFRnkrceUlE0eB0JBEREVEEGIQRERERRYBBGBEREVEEGIQRERERRYBBGBFRjDjXmwxi/UkiiicGYUREMeJcbzKI9SeJKJ4YhBERxcjsqWMwurQoud6k82siyh1cO5KIiIgoJFw7koiIiChmGIQRERERRYBBGBEREVEEGIQREeWR+oPHMWfpJtQfPB51U4jyHoMwIqI88vyaeixauxPPr6mPuilEea9b1A0gIqLMuWtiecqWiKLDIIyIKI+Ul/TB3Gljo24GEYHTkURERESRYBBGRLHGRHIiylUMwogo1phITkS5ikEYEcXaXRPLMWPCUCaSI/6jgplsX9z7gsgEgzAiirVEInl5SZ+omxK5uI8KZrJ9ce8LIhO8O5KIKEvEvbxEJtsX974gMiFKqajbkJaqqipVXV0ddTOIKEPqDx7H82vqcdfEco6GEVHWEZF1Sqmqjh7jdCQRxRqnnYgoV3E6kohijdNORJSrGIQRUayxwjsR5SpORxIRERFFgEEYERERUQQYhBERERFFgEEYERERUQQYhBERERFFgEEYEaUt39btC+Pz5lsfElF7DMKIKMk0MMi3AqphfN5860Miao91wogoKREYAPCszZVvBVTD+Lz51odE1B7XjiSiJK7TSEQULK+1IzkSRkRJrE5PRJQ5zAkjIiIiigCDMCIiIqIIMAgjIkoTy0sQURAYhBERpYnlJYgoCEzMJyJKE8tLEFEQOBJGRJSmxF2kLONBpjiFHT9xOCYMwoiIiELGKez4icMxCXU6UkRuAPAUgEIAC5VS33c8fgGAFwAU2895RCn1ephtIiIiyjROYcdPHI5JaBXzRaQQwDYA1wJoAPA+gNuUUlu05zwL4AOl1NMiUgngdaXUMK/9smI+ERERZQuvivlhTkdeAaBOKbVdKXUKwIsApjmeowD0s//fH8CeENtDREREFBthBmGDAezSvm6wv6d7DMDXRKQBwOsAvhVie6iL4pDESJnD431GtvVFtrWXKF9FnZh/G4CfKqXKAPwNgJ+JSLs2ich9IlItItUHDhzIeCPJEockRsocHu8zsq0vsq29RPkqzMT83QCGaF+X2d/T3QPgBgBQSq0VkbMAlADYrz9JKfUsgGcBKycsrAaTtzgkMVLm8HifkW19kW3tJcpXYSbmd4OVmP8lWMHX+wC+qpTarD3nDQAvKaV+KiJjALwNYLDyaBQT84mIiChbRJKYr5T6FMA/AXgTQA2Al5VSm0VkrojcZD/t2wC+ISIfAvgVgK97BWBEREREuSLUOmF2za/XHd+bo/1/C4CJYbaBiIiIKI6iTszPGbwbiSj78Oc2XOxfIm8MwgLCu5GIsg9/bsPF/iXyFup0ZD7h3UgUZ6trD2DeshrMnjoGkyoGRt2c2ODPbbjYv9GrP3gcz6+px10Ty7ngfAyFdndkWHh3JFH6rn9iFbY2NmN0aRHefPCqqJtDRBkyZ+kmLFq7EzMmDMXcaWOjbk5e8ro7kiNhRHlg9tQxyZEwIsofHI2MN46EEREREYUkqgW8iYhyAu/yI6IwMAij2OMFkKLGu/yIKAwMwij2eAGkqN01sRwzJgzNibwa/lETDfY7dYRBGMVeHC+A/IUab0Efn/KSPpg7bWxO3OLPP2qiwX6njvDuSIq9xAUwThK/UAHErm3E4+OFd8tFg/1OHWEQRuQDf6HGG4+Puzj+UZMP2O/UEZaoICIiIgoJS1QQERERxQyDsAgxuZuIiCh/MQiLEO+WISIiyl9MzI8Qk4eJskP9weN4fk097ppYnhNlKogoHjgSFqFcqj1ElMuCGLVm+gEROTEII6KcE3TAE0TB4LDTD1bXHsD1T6zC6toDoeyfiILHIIyIck7QAY9z1NpPkBf2yg/zltVga2Mz5i2rCWX/RBQ8BmGUMzgSQAlhBzx+gryw0w9mTx2D0aVFmD11TCj7J6LgsVgr5Yzrn1iFrY3NGF1ahDcfvCrq5lAOY6I+EZlisVbKC9k8EhBl0jYTxtOnj2qx/4jILwZhlDMmVQzEmw9ehUkVA6NuStqirBnn9t4MLsyw3h8R+cUgLEJBXOR4ocwNzhymTB5Xt/wpBhdmws4/yyT+PiHKLBZrjVDiIgcAc6eNjWwfFL3E9FZCJo+r870TWEzYjFv/ZSP+PiHKLAZhEQriIscLZW6Kw3HNpeCCzMThvCPKJ7w7koiIiCgkvDuSiIiIKGYYhBERERFFgEEYEWUN3r1HRLmEQRgRZQ2WzSCiXMK7I4koa/DuPSLKJRwJI6KsEfYi2LmC07bZhccrfzEII4qh1bUHcP0Tq7C69kDUTaEsxGnb7MLjlb8YhBFp9OAnyr9O5y2rwdbGZsxbVpPx9yZLHEcnTNuUS0sp5QMer/zFIIxIowc/Uf51OnvqGIwuLcLsqWMy/t5xE1UwFMfRCdM2BT1tG8eANJdwmj1/MTGfSDN76hjMW1aD2VPHoGxAbwDRJIGXDeiNzw4/O9mGfBbVeoZxvAkgqjZxTUmicHDZIsoL9QeP4/k19bhrYnlW/LU5Z+kmLFq7EzMmDM3qi14Q/Z5txy4X8RgQ+cdliyjvxXFqyUtXc0SCmD4KYh9++11/b07VRI/HgCgcnI6kvBDHqSUviYueX0FMHwWxD7/9zukvIsoHDMIoL3Q1qMk2QQSdQezDb79nW9BMROQHc8KIiIiIQsKcMCIiIqKYYRBGFENR1WViPajMcetr5/fjUkCYiILHIIwohqK6mzPb7iLNZm597fx+XAoIE1HwQk3MF5EbADwFoBDAQqXU9zt4zt8DeAyAAvChUuqrYbaJKBtElZieyffN99pTbn3t/H5cCggTUfBCS8wXkUIA2wBcC6ABwPsAblNKbdGeUwHgZQBfVEodFpFzlVL7vfbLxHyi3JArBWmJiLxElZh/BYA6pdR2pdQpAC8CmOZ4zjcA/FgpdRgAOgvAKHxR5pww3yW/cNFiIsp3YQZhgwHs0r5usL+nGwVglIisEZF37elLilCUOSfMdzkjDgFp2G0wrcJu2g79eXHov3RkW3uJKBhRF2vtBqACwNUAygCsEpGLlVJN+pNE5D4A9wHABRdckOEm5pcoi2SyQOcZT63YhiXr9+BoSytmTh4VSe5UXKrWm7ZDfx6AWLTdNO8tLn1NRJkVZhC2G8AQ7esy+3u6BgDvKaVaAdSLyDZYQdn7+pOUUs8CeBawcsJCazFFWlk+36ram4rqAh2XoNi0HR09L+q2mx67uPQ1EWVWmEHY+wAqRKQcVvD1FQDOOx+XALgNwPMiUgJrenJ7iG0iygozJ49Cv17dIw0osi0odrY3yLb7vZPTNLjKtr7OFfl+hy5FL7ScMKXUpwD+CcCbAGoAvKyU2iwic0XkJvtpbwL4WES2AFgJ4GGl1MdhtYmix9yXjjn7Rc+XMs2dylVxyBX024agjx1/foIVh3OL8pvxSJiIDAVQoZRaISK9AHRTSjV7vUYp9TqA1x3fm6P9XwF4yP5HeYC5Lx1jv7iLw1RdHNoA8DwJWlyOK+UvozphIvINWInxZyulRtj1vZ5RSn0p7AY6sU5Ydov78P/q2gPJwpiTKgZm7H3j3i8ULtPjz/OEKPt41QkzDcLWw6r79Z5S6jL7exuVUhcH2VATDMIoTNc/sQpbG5sxurQIbz54VdTNoTzBwrVEuSuIYq2f2AVXEzvsBmuZISLf4pjfMnvqGIwuLcLsqWMibUcc+yafhX08WLiWKD+ZBmH/JSL/AqCXiFwLYDGA34bXLMoHcUmK1S+wZQN647PDz06u0ReVuPQNWcI+Hvl+8wVRvjJNzH8EwD0ANgL477CS7ReG1SjKD3FJio1jkc+49A1Zrq0sxXvbD+HaytKom0JEOcQ0COsF4Dml1P8Fkotz9wJwIqyGUe6LS20kPeBpOHyiyxfbIJKn3fom7MRsJn537K0tjdja2Iy3tjRm9IYNIsptptORb8MKuhJ6AVgRfHPyC/N+4kGfCtIvtn6FOXUV9rQYp0E7xpwtIgqD6UjYWUqpY4kvlFLHRCTapJkcwJo/8RPENGCYU4lhT1NyGrRjcRm1JaLcYjoSdlxELk98ISLjAbSE06T8wb+uc5NXkrXp6Kfb88JO4Paz/ziO6MaxTXG0YGUdRs16AwtW1mX0fXl8iCymQdgDABaLyGoR+QOAl2AtSURdwDui4icu031xnBZ0u3DGsa1xbFMcPbmiFqdOt+HJFbUZfV8eHyKL0XSkUup9EbkQwGj7W1uVUq3hNYsoGqbTcWEv6BzHaUG36fM4tjWObYqjByZX4MkVtXhgckVG35fHh8jiWTFfRL6olPpPEfm7jh5XSv0mtJa5YMV8ioN8rHDOOycpgecCkbmuVMz/gr392w7+TQ2shURZxiufb3XtAVz/xCqsrj1gvD/THBk/+w4Kp88pIQ7Ticwro1zgGYQppb4rIgUA3lBK3eX4d3eG2kg5JFd+cXoFJPOW1WBrYzPmLasx3p/pRc2572zqz2xqa7bLh2WW4hAIEnVVp4n5Sqk2AP8zA22hPJAPvzj9rD9pelFz7jub+jPKtuZbABhEX3v1WRxGReMQCBJ1lWmdsBUi8h1Yd0UmfyKVUodCaRXlrHxIyJ1UMRBvPpheVXXTOlTOfYfZn0Hn/UR57E1r8uVKrpOzr/18rrjXMWTtNsoFpkHYfwOgANzv+P7wYJtDuS7uvzgXrKxL3i12/zUjo25Op8Lsz6ADlyiPvWkAGPfAw5Szr/18rnz4g4koaqZBWCWsAOzzsIKx1QCeCatRRFGNSOh1k7IhCAua3u+moylPrdiGJev34GhLK578ymWu+4tyZMk0AMzVwMPP54r7H0xEucC0WOsLAMYA+BGA/4AVlL0QVqOIosofemByBXoUFmS8blJc6P3uzPvxc0yyKWcNCD7XKS65aHHI4QpaXPqWqCtMR8LGKqUqta9XisiWMBpEBEQ3InH/NSONRsDiMsITNK9+d3vslvFlqNnbjFvGl6W1v3zgZxpwde0BzFtWg9lTx2BSRXq5hfkkV6aOKb+ZjoT9WUSuTHwhIp8FwIqpIfLzV14u/WUY97/coxrhCfsYe/W722Mv/HEHtjY244U/7khrf/nAzx18fkqc5CPeHUm5wDQIGw/gjyKyQ0R2AFgL4DMislFENoTWujyWD1M/2SyqC0Acj/Ff9h1N2ZrKpT8a3PgJQv2UOMlH+R7gU24wnY68IdRWZIlMTkH5mcbJl6mfrh6HII5jVEnLXsc4qinSCwf1Q8Phk7hwUL+0XqdPJ/UoLMBza+px98RyzJ5a2ckr0xeH6WPTNvgpcZKP4jJtG4dzi7KX0UiYUmqn17+wGxkXmRyF8PNXXrb/ZWg6MtLV4xDH0SRTzmOs99n85VuwaO1OzF8eTLqm6fGYNaUSMyYMxawp6QVP+mjic2vq0aaA50I6JnE45uarImzB8EeXY94y6zguWFmHUbPewIKVdb7fOxdHHeMybRuHc4uyl+l0JIE5CGF7asU2LFq7E0+t2Ob5vK4eh1w6jnqffXTgGAAkt0Hu24vf4F9/3d0Ty1EgwN1pHhM9uPAKNNyOeRDBiel6nqbnnTMg1cum+BW3QCGIfo/LtG0u/T6hzGMQloZsH2nKFV09DkEcxziOLMydNhajS4swd9rYQNq3v/lkytZNMBfUSmz/3pS0pyL14MIr0HA75kEEJ6YjMqbnnTMgDaJsyrWVpRhdWoRrK0t97yNIQfS7NW17VeR3kPK6QF1hmhNGFLqZk0ehX6/uWfEX5axXN+CPHx1C3f5m/PIbE1Iey2Suit5n5SV9krlEc5Zu6vLt+3uPnEzZuomyVEBHOXKZzqOcPXVM8ngHYfbUypRg1LRsipe3tjRia2Mz3trSGHnQAuRP/ipRZzgSlgXCHnWJ46hOHHj1y8bdR1O2On1kJKqSEs4pEj/tuP/qkSju1R33X+0dAEQ5HROHUdG4jMh4iduUGUePiCwMwrJA2PkccckXiUM79GDFqz1zplaiuFd3zOlg+kzPVYnqMzkvcqb5Xbo1dQfR1NKKNXUH2z1mmgdlKohgNQ7nT1wx6CGKJ05HZoGwh+6vrSzFe9sPRZ4vEocpCn1qzas906uGYHrVkA73oZcYKBvQ23Uf2Uwf7fvs8LO7PB0ZxJRmHM4fIqJ0iFIq6jakpaqqSlVX52ax/qjqzTzw4gdYsn4Pbh53frsFmPNNXGr+BN0OP/vzeo2e91Y2oHeX2xpVvy+u3oX5y2swa8oY16CaiKgrRGSdUqqqw8cYhAXP7wUlkUw9Y8LQjCY453sQFpfASxfVuZBvxv3r79HU0oriXt2x/rvXRd0cIspBXkEYc8JCYJqb4syDiSp5dubkUZgxYShmTh6V0feNC7fjFWWRzLglUqcjm270uO+q4ehRWID7rhoedVOySjYdY6I4YxAWAtMLqPPiH1XybL4l7ZoGv1EWyXQ7Jtlw8cumBPl9R0/i1Ok27DvqXYaDUmXTMSaKMybmh8B0XUEmEkfDmQTudrwemFyBJ1fUdrlIZldvetCnS6OsyaXzqoWWTed1NrU1TthvRMHgSFiE4jIClQ2jK0HyGqnU++L+a0Zi2/wbu1QoUy+SmQ63UhmZrHy+uHoXxv3r77G4ele7NnlVidfP6yDOrTDPT9OfwTj+jETZpmzuN6I4YRAWobj8gsq3qQWvC0jQfeE3t0uv66XvI4igzvSx+ctr0NTSivnLrUBL7xvTdfv85kf62UeYgl4c3cnP74I49EtnsqGNRFHidGQagr6LLi5TS5xaOCPovjCdmjbdh2n7nNOFXufa/OVbsKJmP/Y0tWDhnZ9Jfn/WlDHJ8g3O99aXSPL6ubh4cH8U9+qOiwf392yvV/vicH4GvTi6k5/fBXHol85kQxuJosQSFWkIumxAHEsjRCmTay6Gyeu4mh7zrp4b1z+xClsbmzG6tAhvPniV5/6u+feVqD94AuUlvbHyO9ek3QavnwtnO8L6vGHQ29Rw+ITruRnEMY3j5yeiYLBERUCCLhsQRE5YXKY0g+CVZ+Qmjp/fawrGdHqmeschvLZ+D6p3HPLVBud0ode5NnfaWIwuLWoXQJkudeT1c2E6bRl0fmTQyyB5rQ9peky9nheX/FAiyixOR6YhiKmloMVlSjMI904qx/zlNbh3knmQG5fPr49keE3BmE7P6PlYfiq560snBfncjnj9XHR1335lchmkoJ9HRPmDI2FZLpuLejpt3H0ETS2t2Lj7iOfz9FEO089vuuC03xEUfZTDa1TD9M7BW8eXoUCsrWn7gh4VnDiyBMW9umPiyBLf7+W3TV39LEH8XAQ9OhX2aFccR4WJyBuDsDQ4b9mPw/5yaRrDT5Fb089vOtVpOgXnt+06r+mp1bUH0aasrelrgr4TbcE7dWhqacWCd9qvGODd9jMBr982dfWzOM+LMAOUuNwBGJd2EJE5TkemoatTRGHvL9u5TWs5k5b9TOvMnjommVjtZX/zyZStKT9T1XohV+dndGuv87ObToP6MWJgX9QfPIERA/u2e8zrvfSA95k7xhu1ye8xNk1oD3raOsx+9ysu7SAicwzC0uC8Zb+r7rtqOJ5cUct16zphWuHei2lu0l8PtaRsu8rrjk9nzS/9M5YN6I3PDj8bZQN6p7zG+dmdfRNkXtysKZU4v7hXhxd1r2OgB5Cmx8rvMTYNroIOUMLsd7/imLNKRN4YhKVhetWQQEesTNety/fb1/1eQP3024WDitBwuAUXDiryfJ5pOQ19VMgZBHb0uRL/dwsunO8b5uiH34u6n2R8v5/D9HVBBygcdSKiIISaEyYiN4jIVhGpE5FHPJ53i4goEemwjkZcueWZPLz4Qwx7ZDkeXvyh5+v9LvQdtGxL6NXbG3Sl9Ts/NwyjS4tw5+eGeT7v4cUfYmtjc6fH2KtEg5635Mxhcjs35izdhK2NzZizdFO7fWQzv8vgRFXaIlf6nYiiFVoQJiKFAH4M4EYAlQBuE5HKDp5XBGAmgPfCaktY3JK4F69rSNm6Mf1FHvYdkHFP6HW2T//aK5HeT7+ZLgvUfPLTlK0br/pSXtzuojyv/1kAkNzqsi2Y9sN5LgT9md1+FkzvriUiSkeYI2FXAKhTSm1XSp0C8CKAaR08738D+AGA9DKhY2y6XVZguqO8gF9h/9XtJ1jJ5AXf2T7T9vrpN9N9P3bTRSju1R2P3XSR8b790gODc4us4CuxdVvoOwjOwCMOgYjz+GRqrU8/hYSJiDoTZhA2GIBee6HB/l6SiFwOYIhSanmI7QjNzMmjMGPCUMycPAoLVtZh1Kw3sGBlHR6ffil2fH8KHp9+adRNNOInWPFbysEPZ/v0r/VjEISGwyfw3vZDaDh8wvN506uGYP13rwvtrla9fIkeGIwqLUKPwgKMKrVy1vQgRH+es/yJVzkUPbjSnzdnyWZr6nPJZgBo97WbTAZrYa5ioQe4ppX/gxCHYJeIMiOyOmEiUgDghwC+bfDc+0SkWkSqDxyIzy8m/Rf2kytqcep0G55cURt1s/JK0KOEzpyrqDz22mY0tbTisdc2p3zGZ1dtx6nTbXh21XYAqUGI/jy9/AmQWg7FOYqpj/LMXbYFTS2tmLtsC4p7dweA5Pa84rOSW6+8PNNgzc9oqnPkK8xRYv293O5W7Qq3z+816pYPU85E+STMIGw3AH2YoMz+XkIRgLEA3hGRHQCuBPBaR8n5SqlnlVJVSqmqgQPjubDzA5Mr0KOwAA9MrvC9j6CLwYYp6BEoJ/1iM2/ZFgx/dDnmLdsS+nslamJ1VBsrk4rO6payTZg1ZQyKe3VPlklxC0Kcz9O/dgYy+ijPyIHWfkYO7IOmllMAkNyeW9QzudX34dyfM3hz42c0Nez8SLfVGPxOe/q5ccRr1C3u+ZtElJ4wS1S8D6BCRMphBV9fAfDVxINKqSMAkuuhiMg7AL6jlKoOsU2huf+akbj/mpFpvcZZQiHuxVud7Q2zJpFeouHn7+5EmwKeW1OP2VPb3dvhyq1ERft+34IVNfuxp6kFd35uGHYdaun07siwTb3kfDy3ph5TLzk/pb1+y6Q4X1fcqzsuHtwfAPDa+j3Y2tiM19bvwdBz+uCDXUcw9Jw+eGh8WUrB2JmTR6Ffr+4dltTQ/3/g2MnkNujyKqbnnen7Okt+uNX/8luSwquOmV6sV+dV4oOlMYhyS2gjYUqpTwH8E4A3AdQAeFkptVlE5orITWG9bzZx/lXrHL2ImyD+CjedTtFHIe6eWI4CAe5O88LjNtLi/Bwf7T+e3JreHekliCmjn727A23K2vrpd+d0pG7h6no0tbRi4Wprf/rdvPoIp/POTreSGs7RuAsH9UtuvdpuOprqZ71M0z5zTv25jbT5nfb0Grnzc66xNAZRbgm1WKtS6nUArzu+N8fluVeH2ZY4cv5VG3QxWFOmhUeD+CvctMK5PuIxe2plWiNgnXF+jrk3X5T8/Imcn658xqdWbMOS9XtwtKUVT37lMl/7aD2tklu3ERMvXqs7XFLWH1sbm3FJmTUSNn18GRava8D08WW+RjgXV+9Kvtf0qiG4/qJBqN5xGNdfNAhVw84G0HF/mr6XV3/qj82cPCrtpYScy0OZLp1lSt+f6dJM+V6cmSifcAHvCHn9VZvJBFzT2++D+Cs87JwendtIi/Nz6CM+QY80OO90Mz2uiYr9Fw4q8hwxcduf192bS9fvSdl29W5e56ibPtLmtwirH34WdjdNuA9iFNj0pgLmfRHlDwZhMZXJX8T3TipHca/uuHdS+IFRJgNPPwGVaTV+L3rw5wxwTZPREyUoRpUWeQaubvvzavu0ceenbE1fp984oj/POY2uJ5ab9qFXEVavaUv9MT8BvvN93cpDXFtZitGlRcnRSD/nhmn7MvmHChFFi2tHxlQmE3A37j6CppZWbNx9JNIbAkynKrvKa7pHbwMAX+1JnUpNne7a33wyZeumpG9PFIi19TNF6DWFV73jcMpW53UM9BGvm8adn/I8/bzRR5f0/fUoLMBza+px98TydtPLzvPddIFsZ9+4Tf25cU71uq31qY9GdpTAb8L0OHIh7szh1C9FjSNhGeDnr+ZMTuM4/8qPSqZGALxGGfW+CKJfnMnte4+cTG69jt2itdYdoXpA2JFbxpdhdGkRbhlfZlzkU6/3BbiXZXDSR7y8njd/+RYsWrsT85dvSXnec2vqk3e5OpmunWnKdCT51+sasLWxGb+2b05wKw9x8eD+KXeUcrQqN3Dql6LGICwDwvxBD2LfzgtRpoS9GLMbrwvo0+98hK2NzXj6nY/S6hfTYHjutLEYXVqEudPGphw75+vdpgydteT0NuqFZvXgrD2VstWnNL2OgZ5n5lZZHki921R/nvMuV68+01/np4J8EEGS3r6n3q5FU0srnnq7tl37KHsxmKaoMQjLALcf9CCWJ8nmXyJeAWQQI3ym+9Cft7HhCAAkt6ZMg2F9qq5HYQEKxJqmc75+7faPU7YJc39rV7T/rVW49tgnnya3eqFZZwCpn2v6aJxXX3jxWrPy/mtGoLhXd9x/zYiU18yeWont35uSnIr0W0bChGmQ5Mw3098rtX2pgSt1LNsq+jOYpqgxCMsAtx/0IJbICeKXiH4hCuKXqGlw6RVAhnE3mtv39a+/efUI9CgswDevHpHWqgBun8XZn/qo0wtrrVpgL6zd0f712jVf78+R59oV7e1t357dkttZUyoxY8JQzJrSvpyHvpSQPhoHpB5/0353W7MSSM0x9NNnTmGu2+j8+dHfS5+OvuGi81AgwA0XndfpPk1/hnJxjUhO7xGlh4n5AdETPKt3HEqpm+TmvP5nof7gCZzX/6xA3tdvIKYnAs9ZuqnLyfFuyc1e7+tkemOC1+d324fz+/r2+TX1OHW6DfuOnkwrQdrtuV4J3GPP74cPdh3B2PP7tXv9zMkVmL+8BjMnV6T055jzipIV7a3nnalir+/DWd1+xLl9UP/xcYw4t0+7iuz660z7XX+es+0tp06nbNPtM6cg1m00/TnR+ybxR9JbWxrx2vo9aFPAK+saOq1ZZ5q0rx/XS8r2JGu1+S0VEges6E+UHo6EdYGen6P/BehVrTyVOLbp8/uXp74eo2litinTkQvTnCCv55mWfNBHHZyjH/rXegDhfF+vdjy8+EMMe2Q5Hl78Ycr3nQndE0eWoLhXd0wcWYKHrhuN0aVFeOi60e32t6buIJpaWrGm7mBKfzpH5+Ys2YhFa3dizpKNKe371i/WYdHanfjWL9YBgOcomVu/e43UeI3A/vrPDSnbrtIT/Z3cjonXCKQp/Wfh1vFlKBDg1g5z7Nxf50U/rvqqBdmM03tE6WEQlgbnL3Y92NJ/8d531XD0KCzAfVcN99zf3qaTKdswaw856XeqmSZmm3LeEejGK4DUAwA/F1Dn/h/9zUZsbWzGo7/Z2O55er8ncqh+va7Bc9rSeazcLqLOJYL+bbmV0/Vvy7cYL1vjVUx2dd3Hya3evk17mwEgufVzXP3kYgHwtcyU17n/F/szJLY60ylnP/Q++92mfWhTwO827UvrdV704zqixBrlS2yJKD9wOjIN+kLPC+/8jOvSMPuOnkxOaXnRl8vpaP8m/NYUutsuGXD3xHIcPPZJ2q8PgtfUhXMKzs0t48tQs7cZt4wv81wW5j//kgh02idW69NHowcVoWZfM0YPKmrXPr2mlOnSRPdOKsf85TXJQrhNLZ8mt17LEXW0WHZHBvXriX1HP8Ggfj1T2ru27iBqDxxHxUD/wbSz7ab8LDPlNYV3wTm90dDUggvOaR+gmE45m/anq64PWnta+PUrkucuEeUPjoSl4S/7mlO2+i37+l/epvWlnCNGHx04lrLtKq/RBf1OtXQS0INsg9eIgdcUnE4fTXKOmOn7/97fXYLRpUX43t9d0m4f+mjigq+Nx4wJQ7Hga+M932vrvqMAkNze+3lr9Ofez6deRJ1J6vfYz7vn8+WeJTDc+sZZouLb141Gca/u+PZ1o1NeM9R+3dAujGg62x7mnW9ePzO9exSmbHVu/eQ15ezHzC9VoLhXd8z8UoWv13emqyU5iCg7MQhLg76en5N+ITedZnKaPn4IehQWYPr4YKrWm07JBLGUkNvzTJeFcTJdz1Hvd69q9F5TpG77n/XqBixauxOzXt0AIDW/q86uhZXYOssvdNQ+r+eZemzpZjS1tOKxpZsBAP/2ujUl/m+vp04Zek3hOen5gV5tD/PON78/M6a6GkCa3vEZhCDumiai7MAgLA3XXzQIxb264/qLBrV7TL+Q+83TWrp+D06dbksurNxVQSTZ+ylZ4NUGv3lGbvR+X7fjEAAkt37oSeAbd1ujXImtnt9182WDASC5dVO94xBeW78H1R20yWuEzy1o6NG9IGXbdKI1ZZtwoT2Fe6HHVG6CWyX7oKvYe3HewKB/fr0Mh19dDSD9fnav4M/tscTd0l25a5qIsgODsDQ4k6zdmE59OEeFgl5IO4gk+64uOuxVh8k5teb8Ol2fnE7d+pEY6di4+wjmTK1Eca/umGOPWult32AXc01s3Rb+9rpT1usO0Ide+gCL1u7EQy99kPKaHoUFKdvxFxSnbBPu/NwwjC4twp2fG9aufU6mifRh3vm2YOVHaGppxYKVHwFIDZr06v/OUTvTEa6uBpB+P7vXTSVugWHvHt1Sttkk24q1EkWNQVgagg6S9AKagL8pj7Arywd94dWnBZ0Binlpj44VOLY6r8+4YGUdRs16AwtW1qXkX1cNOxs3jTsfVcPObtd2ZxkO/WKr/9+0tIHzguwM8hJGnNs3ZbvBHqXbsPtoSlDvzDfzGgnq6hRpEEbYxWcTWz1o0j/LT/5gjdr95A/t71j14u/u0I7LuKRDX9HAKZtXu3DDYq1E6WEQlga9dlM63EZ4zivumbId1O8s9CgswKB+3tMQ+sU2iF96fktA6EzboV/M9AWhAaRVi6kjP7j1EhT36o4f3No++V5vnx50AcCTK2px6nQbnlxRi+72CFP3DpYS8roQu+WjvbJul13k03t0z3lBHtCnR8o24dyininbByZXoEdhAR5wFHV1Xvz9LEaeyQRxZx0zt6CpuFe3lG3QgYx+jPVpWr8/Z/pUqulaqUFMv0YlFwNLojBl3095hullD/zSR3j0CvrOfJ5nV23HqdNteHbVdtx/zUjX/ekX22fusO7ii/qXnmmlbGdpB70/VtceRJuytn5MrxriukKB3r7rn1iVDLruv2YkHphcgSdX1OKByRV4/M2tAICGwy3tPtNDL63HB7uasLHhCJpaTqH+4AnMWboJK79zTcp6jPO/fEmyHEIiv+9IS/uREJ2z1MjN4wbjuTX1uHnc4JRz0FlqYWBRT/TuUYiBRT0xe+qYZMmTxAhY4kKuJ76XDehtVD3edOUDv1bXHki2d1LFQNdSK/pntqYka0JZwghILZWhl3G5/cqhANL/OdPbblpJXy+7km38lswhylcMwjqh/+L0W2vo1vFleG5NfbsRntrGYylbt7pjTvrFNohfel2uoYRgfvnqnytoevumjTsfi9c1YNq48wEAGxqacOp0GzY0NGH4wD746MBxDB/YBw2HTyTreJWX9MEme5p40+4j+MLogag/eCK5aPbcaWM7bPs9nz9zIU/HS+9bI2gvvb8Lp063pVy89X7WA/z1370uGSyVDeidckydSzOZBANhHg/A3/JW5SV9Up5r+llMOZdj0qdo/ezfz5JQesDcWcFjIspunI7shD687pVX4pVU7jbC8+C1o9CjsAAPXmvdHTeo/1ko7XcWBnVyV5RpRfo4CrMmmSlnvtXvNjcmt3pdKeednPrxct4pqx8TfXr39iuH4mtXDk2OpJgqG9ArufWa4nFO6SZ41ckynfZ2nmduNx84meZPmS5v5TUtajr9ZXrTh+lyWab0fZjmpXFKjyh/MAgLiD4i4fzlPamiBAVibXX3XzMS2+bfmJx6DLp8g9fFS29jEHllphcsrwtR0J/frX3l9tIwie0NF5Umt3o+jjNI0I/XU2/XoqmlFU+9Xev5vn77tvmT1uTWq8/0gsGm9GnvdOifxetzmX5m0z8mvM4L08DGz00fQfxc+NkH118kyh+cjuyE6fI0+lSic4rkpWp7aql6l+cdaG7TP87cGZ1zqR6d13SP3kbTaRIvQUwLdXX6a3H1ruQxcAYlevvetEe+Ett/vnEMzu13VvLzJ6bxnFNfel+3nrbqYCS2+nt3NL2bbt9eOKgfGg6fxIWD+qX1OhOm095OHZ0nHX2uIM4nnX5eeJ3vXkw/s75//XP4fd+g+4KIcguDsIDoSeGJkgaJX7wXD+6HP350CBcPTr2gOn+xWyMD7UcFnMHUvGVbknlGznwhnVdQo18cnLlPfrhdbJyfccHKumQSvPPmA7fP70Xf/5ylm9DS2oY5Sze1C8L0Kbiis7rh6MlPUXSWdfr/4y/WYcveZlTvOITPjSjBz9/diR6FBbj9yqEpbdcD8hEDi9B49GOMGGgVQ3XmZunHQv+/HlADSP7fmSw/a0olzi/u1enF2zQ40N/X6wYGnTOodeb9uQXbQSdnlw3ojc8OPzvZR36CfWe5ETfO/SfeY87STb7e16sv/AZ2RJQ7OB3ZCT2HyXR6zzmdMP/Ll2DGhKGY/+XU0gmmUxXOaTH91nmv/BHTpXpMpwG9Pr/bFIrzM+rlIIKg51+dbmsDgORWp0/BjbWD4cS2xl7ap2ZvMxbadagW/sF7ys1ZKsItN8tJX5Lm0d9swNbGZjz6mw3t3isRGDccPmH8+b34mer1W7ct6IKdet/4zZfS9+HVPrf9+ynxAXj3BWtqERGDsE6YBitev1DdAhTnUi1unMGUXuE8iPwR0wRpPxdy58VLr2vl5Jb4bVol/ew+PVO2Or0G2a5DLQCQ3F5kL+1z0XlFKQVfnRfkiSNLUNyrOyaOLGl3g4Gem+V14U3cTWltz5SGdb7Xd17+EFsbm/Gdlz/srIuN+Ck0fN9Vw9GjsAD3XTU8rfcKOrhoOXU6ufV7vuv96+dn1e/all7vxQR8IuJ0ZBpMp/dM6csgOaeHvKYqZk+tDLS6+b4jJ9F49CT2HWm/8HXq+6afs/XD32/F1sZm/PD3WzGpYiDuv2akaw00fSoIQPL/ie3CP9Rj9tTKlOdNHFmCd7YewMSRJe1qSOl9uGzDHrQpYNmGPdh39BMAwK7DVhC2yR4J27S3GfdqJSWcU0nO4+U2zeQ1ZaZPMzYcPuFaauTYqU9Ttm70/DOv3EF9NQbTJP59R0/i1Ok27DvqfV44XVtZmpze9mI6HfeKXfPslXUNeHz6pSmPeeUBuvHzs+o3t8vrdaypRUQcCUuD6fSeqWnjzkePwoJkvSpdJqcqTKed9NwcU5vsZXUSW9OpIP3/3QutEaPEVn9MD4ycx+f+n1dj0dqduP/n1ThiF8Q9cqIV9m6SW92Oj4+jTVlbp0vK+qds3fQoLECBnFnf0Y3en//ws2oMe2Q5/uFn1QCAf7x6JHoUFuAfrx7pWV7BdKTWz6iL3yk4t1Ej57E3Pcf729XxE1ud6bmrv5efn1W/I3C805GIvDAIi9DS9Xtw6nRbsqq6LpNTFYmLbGcXWz9LEzlroZnmMOkXr88MGwAAya3+mNdU6l/2HUtuLxtaDAC4bGgx+vXqDgDJbc9CJLcbdzcBsLbOoOHVD3anbHV6vtyitTvRppAyqpeg96HeF3qtMgD47YfWufHbD/d4Bhp6G736wisYcMv18zsF53buOs8f03P8P756OUaXFuE/vnp5u8dMc/E49UdEccQgLEJ+L5pBS1xkO7vYml7I9Iutsxba/uZPUrZur9O53dgAeI9OXjiob3Kr76OPHXUltgPsHLIBfXqi5ZSV1N9yqq1de/raz+/bs7BdgKaPQnnlvbn1oV6rDAD+sq85ufUKNPQ2+i3i6zaCpudipeONjXvx4p924Y2Ne1O+7/zspue413S5aZ00jkgRURyJUirqNqSlqqpKVVdXZ+z9/OScZJugP6NXrs81j7+D+o+Po/ycPlj58NXGr/PzXm6POXOn7n3hfayo2Y/JY87FO1sP4NM2hW4Fgrce+kLK6/XnnV/cC4vW7sSMCUMxd9pYz3wsP22/8clVqNnXjDGDivDGA1f52ocpt7YPf3Q52hRQIMD2700x3t+oWW/g1Ok29CgswLb5N/pqk27cv/4eTS2tKO7VHeu/e12X90dElEkisk4pVdXRYxwJ64Tf2/RNeOVHPbz4Qwx7ZDkeXhzM3XFeTJdLMuVd4b0MPQoLML2q/eLEbq9z9pOeI6WPBDn7TC/zoE+5zfzln7G1sRkzf/lnAEDT8VPJ7ZcvGwwA+PJlg9u1R1+qyDmq42cUyquf/mWKNUr6L51MswUxwuPW9lsuL0vZmnIbCfQqceLFdMoxTEGX3SAiAhiEdSrMC4BzuktPzl5s3xGW2AYttQRE15cLMr1IeeXBue3D2U9zf7sFTS2tmPvbLSnJ484+0z+X/v9DLdYdh4lt9V+bktvqnYes/9tbnX4TQNjTW37zsYK0u+lEytaUcwo6we955mdppqCxphcRhYFBWCfCvAA4R1P05Ozp463Rh8Q2aPpFxbROmOn+vHjdEeqWtO/sp4vtuxMvLuuPp9/5CFsbm/H0Ox+16zO9Npb+vnoiPgAM7Ns9uU2t45XKq59Mg1DT55nWkAvTh/YC54mtKf0z6qNfQZxnUfF7pygRkRfWCYuQs07QDReV4nebG3HDRaV4fPql7WoiBUmvX+RcI7Gr+/Pyyz/9FadOt+GXf/pru5GS/c0nU7Zu5n/54mQe1E3/8QcAwMaGI/jlN65M6bM3N+9DU0sr3ty8Dxt3H8Gp0214Ye0OLPz6FSn1xMoG9MaBY0dQNqC353JBXiU6nGtxuuVpmS6741VDLlMuLeuPP350CJd2UpLDSf+M720/lBz9euaO8WmXOImLX69rwNbGZvx6XUPaNz4QEblhEOaQyfXcnO/1zB0d5u2FIuhCkab7O3riVMpWt9e++22v4y445yLqv3h3Z3J9xzl/W+m6MPNHB44lt8dO2sVPT36aMtU3qWIgtu23nrdt/zHPz6G3Y+bkUSnHTg9CvQIt02C1q4uZB+GbV4/Ex8dq8M2r2xfX9fo50T/jtZWlyc+RzrqP+v4BcI1FIspJnI50yGTuh5+6W2EL+72Ot55Obp3vdf/VI1Hcqzvud1z0N9rTYYmtvnamflOBM/F7+vgh1k0A44fgtisuQIEAt11xQbvpzcRIz6Vl/VOS/r36YtarG7Bo7U7MenUDAGDOko1YtHYn5izZ6FnKwzSXzG+5CTd+jqszL03vX9Olf/TPkU6tLrd6alFxLlNFRBQEjoQ5+F2eJMz3SmcEoauCfi/niMnAvj2w7+gpDOzbo90I16sfNKCppRWvftCQMgW33Q4cEtvRpX1Rs+8YRpf2bZfsnfj/mw8ObHcTQJsCVtcexOyplSmfTR/x+cYL1Tj5aRv+15JN+PvPDEnpC32JoJv+X3sa1F4JYHXdxynbuNGPqz465RXkOc9Pva+fuWN8ymO6hxd/iMXrGjB9fFnK9HA6o6/6e89fbq0ZeuwT7yWcwsQlhogoDBwJc4hjUcdMVvsO4r30URfniMnHx1tTtrpEQJPYJgy3j0Viu+BrVZgxYSgWfK0qJeHeuUi1ngjulRSuj/icOm0Vaz11uq1dX+glL775hRHoUViAb35hBABg0shzkts43kmnfxZn4Oo2Sub8WdD70OvnJIg7e/X99+1p/a2Y2OriWDoijm0ionhiEBYh04t1JgPDIN5rxk/eswqZ/uS9dnf53TxucHJb0rcnCgQo6WtVrHcGNsk2DeyTstXpo136ItVA6pSe/n/nWox6gFJ+jpU0Xn5O73Z9oQcvzsWtbxo3GMW9uuOmcYN9BbJ+a2jpvC7+Xks9mZ6HplOk/c7qlrLtKq+pwDgGvHFsExHFE6cjI5TJqc9M2nW4Jbl96u1taGppxVNvb8P0qiFYUWPlF62oacTxT04n11icPbWyXWCT4BwJ0afWJlWUoHZ/MyZVlOD2K4cC6Lw/9QK806uGJEe4rq0sxSenrRUkElvdvZPKMX95De6dVI6qYWenvJe+z/XfvS7tqas5Szaj/uPjmLNkc7uVBEyZTiVbwdSZQCro8/DHt18e6E0FXlOBcfwZimObiCieOBIWoThOfQZBXwfxpL3uYGJ7+ERrcuusrK6PIOmjOs6REP15r6xrQJsCXlnXYNyfzgK8+gjXBWf3AoDkVqePtDnfq6tFfUec2ydl64ffqeSgz8MgbiowndLz0/awpwtz9eeaiILHICwgzAM545k7qrDj+1PwzB1VSAwoJbbdCyW5dVZW1y9e+pSO86Kmf20a/OjHx1mAV5+e693DGm1LbHVeBTurhp2Nm8adnxwhSzCdZpw1pRIzJgzFrCmV7dprKpcu/mFO6YU9XcjfBURkikFYQPIhD8Tr4uL22LGTrSnbsef3S24XrKzDqFlvYMHKunb7Mx3VMV3RwOv4mI7ceC0l5LZ/06V6nAFUHM+nTAYXYd6MEsS+71j4LoY9shx3LHy33WNxPHZEFE/MCQtIPuSBeOUcuT1WUFAAtLVZWwBDz+mDD3YdwdBz+uDJFbU4dboNT66obVc93ysPaMHKOjy5ohYPTK5o9zo3QRwfr324Pea36Oq1laXJPLW4yGSplDBLQgSxb6+SJPnwu4CIgsEgLCD5UEfI6+LiFjQU9+6GxqOnUNzbOtVuGV+Gmr3NuGV8GUaVFiWDqXR4BW9uTI+PVzkEr324PeZMgjflrOofBwwuzpg08hysrvs4WZpElw+/C4goGAzCyJjXxcU5VZcY/fn36eNSRoL0582dNtY4iNI9MLkiGbwFvcyUXpA1SnEMeBhcnPGze6+MuglElANCzQkTkRtEZKuI1InIIx08/pCIbBGRDSLytogMDbM9FB49aV3Pg3LmW3klt5vmHOkJ/bmaf9PVJPtsSw531m4jIsoHoQVhIlII4McAbgRQCeA2Eal0PO0DAFVKqUsAvALg/4TVHuo6rwu7PsJlWp3eyU9AFXQCd64Eddn2OfQ6a0D2BZFERH6EOR15BYA6pdR2ABCRFwFMA7Al8QSl1Ert+e8C+FqI7aEucq71qNOnz8pL+iTzoJzThX6S270EPUUWx2lAP7Ltc8yaMgbzl9ckS41k8iYAIqKohDkdORiAPrfQYH/PzT0A3gixPeTCdCoosYByRwspu02fOUdkvKbZ4lh40897BbEEUVdlW80wZ6mRTK6XSpQOjtJSkGJRJ0xEvgagCsDjLo/fJyLVIlJ94EB0F7ZcpU8Fef2C8bpz0E3YF9Ogp9289mf6Xqa1wchd3IJIXngpIdum+inewpyO3A1Ar6BZZn8vhYhMBjALwBeUUp90tCOl1LMAngWAqqqq9ov6UZfoU0Fe00BxuXNQF/S0WxDTpfoak5QbOD1KCdk21U/xJkqFE9OISDcA2wB8CVbw9T6AryqlNmvPuQxWQv4NSqlak/1WVVWp6urqEFqcv/S8LQC+Sj64lYqYs3QTFq3diRkThubNxSvozxx0GY64v28csS+IyC8RWaeUqurosdBGwpRSn4rIPwF4E0AhgOeUUptFZC6AaqXUa7CmH/sCWCwiAPBXpdRNYbWJOub8K98kcHBelNxGCvLxr8agP7Pet3dNLM9YMMDRnzNYI42IwhBqsVal1OsAXnd8b472/8lhvj+Z8RM0OC/QbhXz8/HiFeYdm5kMjPIxgCYiyqRYJOaTf34ThvXXmSZB669xFl31qv9FXaMfn0zeNRi35HgiolzDZYuynN+RET+v018DIGVtQ46aZEY+jiwSEeUqBmFZzm/w4+d1Hb0m8X8GB2cwiZuIiEyEdndkWHh3JMVdPt4RSkREHYvk7kiifMWpWSIiMsHE/BzGKt/RYEI7ERGZYBCWw0yX18iVYG3esi0Y/uhyzFu2pfMnB+yOhe9i2CPLccfCdzP+3kRElJ0YhOUw03IGubIW2nNr6tGmrG2mra77OGVLRETUGQZhOcx0WiyTtafCdPfEchSItc20SSPPSdkm5MooIxERBY93RxIFgGtnEhFRR3h3JFHIuHYmERGli0EY5b0giqu6BVssYktERG4YhFHeC2JRbAZbRESULgZhlPc4ZUhERFFgEEZ5j6NYREQUBZaoICIiIooAgzAKBOthERERpYdBGAUijlX3GRgSEVGcMSeMAhHH5PYg7nokIiIKC4MwCkQck9vjGBgSERElcDqScpbp2pk6TmESEVGmMAgj0sQxty0qDEiJiMLF6UgiDacwz2BOHRFRuBiEEWnimNsWFQakREThYhBGRB1iQEpEFC7mhBERERFFgEEYERERUQQYhBERERFFgEEYERERUQQYhBERERFFgEEYERERUQQYhBERERFFgEEYERERUQQYhBERERFFgEEYERERUQREKRV1G9IiIgcA7Iy6HQEqAXAw6kbECPvjDPbFGeyLM9gXZ7AvzmBfpIpTfwxVSg3s6IGsC8JyjYhUK6Wqom5HXLA/zmBfnMG+OIN9cQb74gz2Raps6Q9ORxIRERFFgEEYERERUQQYhEXv2agbEDPsjzPYF2ewL85gX5zBvjiDfZEqK/qDOWFEREREEeBIGBEREVEEGIRlkIgMEZGVIrJFRDaLyEz7+2eLyFsiUmtvB0Td1kwRkUIR+UBEltlfl4vIeyJSJyIviUiPqNuYCSJSLCKviMhfRKRGRCbk63khIg/aPx+bRORXInJWPp0XIvKciOwXkU3a9zo8F8TyI7tfNojI5dG1PHguffG4/XOyQUReFZFi7bFH7b7YKiLXR9LokHTUF9pj3xYRJSIl9td5d17Y3/+WfW5sFpH/o30/tucFg7DM+hTAt5VSlQCuBPCPIlIJ4BEAbyulKgC8bX+dL2YCqNG+/gGAJ5RSIwEcBnBPJK3KvKcA/E4pdSGAS2H1Sd6dFyIyGMD/AFCllBoLoBDAV5Bf58VPAdzg+J7buXAjgAr7330Ans5QGzPlp2jfF28BGKuUugTANgCPAoD9u/QrAC6yX7NARAoz19TQ/RTt+wIiMgTAdQD+qn07784LEbkGwDQAlyqlLgLw7/b3Y31eMAjLIKXUXqXUn+3/N8O60A6GdeK8YD/tBQA3R9LADBORMgBTACy0vxYAXwTwiv2UvOgLEekP4CoAPwEApdQppVQT8vS8ANANQC8R6QagN4C9yKPzQim1CsAhx7fdzoVpABYpy7sAikXkvIw0NAM66gul1O+VUp/aX74LoMz+/zQALyqlPlFK1QOoA3BFxhobMpfzAgCeAPA/AegJ3nl3XgD4JoDvK6U+sZ+z3/5+rM8LBmEREZFhAC4D8B6AUqXUXvuhfQBKo2pXhj0J65dHm/31OQCatF+wDbCC1FxXDuAAgOftqdmFItIHeXheKKV2w/oL9q+wgq8jANYhP88Lndu5MBjALu15+dY3dwN4w/5/3vWFiEwDsFsp9aHjobzrCwCjAEyy0xb+S0Q+Y38/1n3BICwCItIXwK8BPKCUOqo/pqzbVXP+llURmQpgv1JqXdRtiYFuAC4H8LRS6jIAx+GYesyj82IArL9cywGcD6APOpiCyWf5ci50RkRmwUrx+EXUbYmCiPQG8C8A5kTdlpjoBuBsWKk+DwN42Z5diTUGYRkmIt1hBWC/UEr9xv52Y2Ko2N7ud3t9DpkI4CYR2QHgRVjTTU/BGjbvZj+nDMDuaJqXUQ0AGpRS79lfvwIrKMvH82IygHql1AGlVCuA38A6V/LxvNC5nQu7AQzRnpcXfSMiXwcwFcDt6kydpXzrixGw/lj50P49WgbgzyIyCPnXF4D1e/Q39hTsn2DNsJQg5n3BICyD7Kj8JwBqlFI/1B56DcCd9v/vBLA0023LNKXUo0qpMqXUMFhJk/+plLodwEoAt9pPy5e+2Adgl4iMtr/1JQBbkIfnBaxpyCtFpLf985Loi7w7LxzczoXXAMyw74a7EsARbdoyJ4nIDbDSGG5SSp3QHnoNwFdEpKeIlMNKSv9TFG3MBKXURqXUuUqpYfbv0QYAl9u/T/LuvACwBMA1ACAiowD0gLWAd7zPC6UU/2XoH4DPw5pG2ABgvf3vb2DlQr0NoBbACgBnR93WDPfL1QCW2f8fDusHpA7AYgA9o25fhvpgHIBq+9xYAmBAvp4XAP4VwF8AbALwMwA98+m8APArWPlwrbAurPe4nQsABMCPAXwEYCOsu0oj/wwh90UdrByfxO/QZ7Tnz7L7YiuAG6Nuf9h94Xh8B4CSPD4vegD4uf17488AvpgN5wUr5hMRERFFgNORRERERBFgEEZEREQUAQZhRERERBFgEEZEREQUAQZhRERERBFgEEZEOU9ErhYRJSL3at8bZ3/vO5289mZ7EWAiokAxCCOinKZV2t8E4O+1h24D4FxzryM3A+gwCNP2TUSUNv4CIaKsYC9q/jKsZUcKAfxvWAt8PwngBIA/ABiulJoqIo/BWtZlOKwq/P8fgJ0A+olIKaxlf24A8Lq2/xGwClwOtPf3DVhr0d0E4AsiMhvALbBWvVgPq/jyr0TkHQA/BNAXVoXuryul9orI/wDwD7DWN9yilPpKCN1CRFmMQRgRZYsbAOxRSk0BABHpD2t064uwqqi/5Hh+JYDPK6VaRORq+3uvAJgO4ANYVbU/0Z7/LIB/UErVishnASxQSn1RRF6DtaLDK/b7AkAPpVSVvRbsfwGYppQ6ICL/DcB8AHfDWoS9XCn1iYgUB9gPRJQjGIQRUbbYCOD/EZEfAFgGoBnWYt+1ACAiPwdwn/b815RSLY59vAwrWLsQ1tInn7Nf29f+/2I7yAKs5ZLcJAK+0QDGAnjLfl0hrOVUAGsJql+IyBJYS1EREaVgEEZEWUEptU1ELoe13uo8WGspejnewT72iUgrgGsBzIQdhMHKj21SSo0zbE5i3wJgs1JqQgfPmQLgKgB/C2CWiFyslPrUcP9ElAeYmE9EWUFEzgdwQin1cwCPwwqghtm5XICVaG9iDoB/VkqdTnxDKXUUQL2ITLffS0TkUvvhZgBFLvvaCmCgiEywX9ddRC4SkQIAQ5RSKwH8M4D+sHLGiIiSOBJGRNniYgCPi0gbgFYA3wRQAmC5iJwAsBruwVKSUuqPLg/dDuBpOwG/O4AXYd09+SKA/2sn2t/q2NcpEbkVwI/sHLVusG4U2Abg5/b3BMCPlFJN6X1cIsp1opSKug1ERF1mJ99/Ryk1NeKmEBEZ4XQkERERUQQ4EkZEREQUAY6EEREREUWAQRgRERFRBBiEEREREUWAQRgRERFRBBiEEREREUWAQRgRERFRBP5/ojTffhND43kAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.scatterplot(x='sqrMetres',y='price', data = mieszkania_train, linewidth = 0, s = 5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pytanie- Jaki jest baseline naszego systemu?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Czym jest regresja liniowa?- przypadek jednowymiarowy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![regresja liniowa 1](obrazki/1.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![regresja liniowa 2](obrazki/2.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![regresja liniowa 3](obrazki/3.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![regresja liniowa 4](obrazki/4.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## wzór na regresję w przypadku jednowymiarowym?\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$Y = a*X_1 + b$\n", + "\n", + "$Y = w_1 * X_1 + w_0$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Zadanie - napisać funkcję predict_score(sqr_metres) która zwraca cenę mieszkania zgodnie z modelem regresji liniowej ( 5 minut) \n", + "\n", + "Należy samemu wymyślić współczynniki modelu" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "def predict_price(sqr_metres):\n", + " return 2000* sqr_metres + 200000" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "240000" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predict_price(20)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "280000" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predict_price(40)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "310000" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predict_price(55)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "200000" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predict_price(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "Y_train_predicted = predict_price(X_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Mierzenie błędu" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![RMSE 1](obrazki/6.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![a](obrazki/5.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Zadanie - napisać funkcję, która liczy błąd średniowadratowy na całym zbiorze (7 minut)\n", + "\n", + "rmse(Y_true, Y_predicted)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "def rmse(Y_true, Y_predicted):\n", + " return np.sqrt(np.sum((Y_true - Y_predicted)**2)/ len(Y_true)) " + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmse(np.array([300_000, 250_000]), np.array([300_000, 250_000]))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "70799.01129253148" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmse(np.array([305_000, 250_000]) ,np.array([300_000, 350_000]) )" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "80622.57748298549" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmse(np.array([300_000, 250_000]), np.array([330_000, 360_000]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Zadanie - za pomocą rmse policzyć błąd dla baseline (3 minuty)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([357461.18628244, 357461.18628244, 357461.18628244, ...,\n", + " 357461.18628244, 357461.18628244, 357461.18628244])" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.ones_like(Y_train) * Y_train.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "125698.71268014389" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmse(Y_train, np.ones_like(Y_train) * Y_train.mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Zadanie - za pomocą rmse policzyc błąd dla predykcji (2 minuty)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "123420.02227684396" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmse(Y_train, Y_train_predicted)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Na jakim zbiorze najlepiej sprawdzać wyniki?\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![RMSE 2](obrazki/7.png)\n", + "\n", + "![RMSE 3](obrazki/8.png)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "mieszkania_dev = pd.read_csv(DATA_DIR/'dev-0'/'in.tsv', sep = '\\t', names = names)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "with open(DATA_DIR/'dev-0'/'expected.tsv','r') as dev_exp_f:\n", + " Y_dev = np.array([float(x.rstrip('\\n')) for x in dev_exp_f.readlines()])" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "mieszkania_dev['price'] = Y_dev" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "X_dev = mieszkania_dev['sqrMetres'].to_numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.scatterplot(x='sqrMetres',y='price', data = mieszkania_dev, linewidth = 0, s = 5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Zadanie - policzyć rmse dla predykcji ze zbioru deweloperskiego modelu baseline i naszego modelu regresji liniowej" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "Y_dev_predicted = predict_price(X_dev)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "117309.3154367544" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmse(Y_dev, np.ones_like(Y_dev) * Y_dev.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "104227.56492755697" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmse(Y_dev, Y_dev_predicted)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Przypadek wielowymiarowy" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.scatterplot(x='floor',y='price', data = mieszkania_train, linewidth = 0, s = 5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$Y = w_1 * X_1 + w_2 * X_1 + w_3 * X_3 + w_0$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Zadanie. Napisać analogiczną funkcję predict_price(sqr_metres, floor), policzyć rmse dla takiego modelu ( 7 minut)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "def predict_price(sqr_metres, floor):\n", + " return 4000* sqr_metres + (-1000)* floor + 100000" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "298000" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predict_price(50, 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "295000" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predict_price(50, 5)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "Y_dev_predicted = predict_price(mieszkania_dev['sqrMetres'], mieszkania_dev['floor'])" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "117436.43511182851" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmse(Y_dev, np.mean(Y_train))" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "100227.89896326358" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmse(Y_dev, Y_dev_predicted)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## jak dobrać najlepsze parametry?" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.lmplot(x='sqrMetres',y='price', data = mieszkania_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "lm_model = LinearRegression()" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression()" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lm_model.fit(mieszkania_train[['isNew','rooms', 'floor', 'sqrMetres']], Y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "Y_train_predicted = lm_model.predict(mieszkania_train[['isNew','rooms', 'floor', 'sqrMetres']])" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "103308.92502763818" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmse(Y_train, Y_train_predicted)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "Y_dev_predicted = lm_model.predict(mieszkania_dev[['isNew','rooms', 'floor', 'sqrMetres']])" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "84157.87889057388" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rmse(Y_dev, Y_dev_predicted)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/kuba/anaconda3/envs/zajeciaei/lib/python3.10/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "array([469449.27836213])" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lm_model.predict(np.array(([[0, 4, 3, 70]])))" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/kuba/anaconda3/envs/zajeciaei/lib/python3.10/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "text/plain": [ + "array([455982.54297977])" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lm_model.predict(np.array(([[0, 4, 3, 60]])))" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 4522.65059749, 73763.4125433 , -78.83243119, 1346.67353824])" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lm_model.coef_" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "80364.9778059895" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lm_model.intercept_" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "455982.5429800203" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "0 * 4522.65059749 + 4* 73763.4125433 + 3 * (-78.83243119) + 60 * 1346.67353824 + 80364.97780599032" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "with open(DATA_DIR/'dev-0'/'out.tsv','w') as f_out_file:\n", + " for line in Y_dev_predicted:\n", + " f_out_file.write(str(line))\n", + " f_out_file.write('\\n')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Uwaga - regresja linowa działa dobrze tylko dla danych, gdzie występuje korelacja liniowa" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![RMSE 5](obrazki/9.png)\n", + "\n", + "![6](obrazki/10.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Zadanie domowe\n", + "\n", + "\n", + "- https://gonito.net/challenge/retroc2\n", + "- termin 17.05\n", + "- należy użyć wektoryzacji (np tf-dif)\n", + "- wynik zaliczający to max 50 RMSE dla dev-0 \n", + "- punkty: 60, dla 3 najlepszych wyników na test-A: 80,\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "author": "Jakub Pokrywka", + "email": "kubapok@wmi.amu.edu.pl", + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "lang": "pl", + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + }, + "subtitle": "7.Regresja liniowa[ćwiczenia]", + "title": "Ekstrakcja informacji", + "year": "2021" + }, + "nbformat": 4, + "nbformat_minor": 4 +}