{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n", "
\n", "

Ekstrakcja informacji

\n", "

7. Regresja liniowa [ćwiczenia]

\n", "

Jakub Pokrywka (2021)

\n", "
\n", "\n", "![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Regresja liniowa" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## import bibliotek" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from pathlib import Path\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from sklearn.linear_model import LinearRegression\n", "plt.rcParams['figure.figsize'] = [10, 5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Zbiór \n", "\n", "https://git.wmi.amu.edu.pl/kubapok/mieszkania2-below1m-public" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## ładowanie zbioru train" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "DATA_DIR = Path('/home/kuba/Syncthing/przedmioty/2020-02/ISI/zajecia7_regresja_liniowa/mieszkania2')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "with open(DATA_DIR / 'names') as f_names:\n", " names = f_names.read().rstrip('\\n').split('\\t')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "mieszkania_train = pd.read_csv(DATA_DIR/'train/in.tsv', sep ='\\t', names=names)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
isNewroomsfloorlocationsqrMetres
0False31Centrum78
1False32Sołacz62
2False30Sołacz15
3False40Sołacz14
4False30Sołacz15
\n", "
" ], "text/plain": [ " isNew rooms floor location sqrMetres\n", "0 False 3 1 Centrum 78\n", "1 False 3 2 Sołacz 62\n", "2 False 3 0 Sołacz 15\n", "3 False 4 0 Sołacz 14\n", "4 False 3 0 Sołacz 15" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mieszkania_train.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "with open(DATA_DIR/'train'/'expected.tsv','r') as train_exp_f:\n", " Y_train = np.array([float(x.rstrip('\\n')) for x in train_exp_f.readlines()])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([476118., 459531., 411557., ..., 320000., 364000., 209000.])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Y_train" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "mieszkania_train['price'] = Y_train" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "X_train = mieszkania_train['sqrMetres'].to_numpy()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Wizualizacja danych" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
isNewroomsfloorlocationsqrMetresprice
0False31Centrum78476118.0
1False32Sołacz62459531.0
2False30Sołacz15411557.0
3False40Sołacz14496416.0
4False30Sołacz15406032.0
.....................
1652True20Grunwald51299000.0
1653True22Centrum53339000.0
1654True34Stare65320000.0
1655True31Nowe67364000.0
1656True33Grunwald50209000.0
\n", "

1657 rows × 6 columns

\n", "
" ], "text/plain": [ " isNew rooms floor location sqrMetres price\n", "0 False 3 1 Centrum 78 476118.0\n", "1 False 3 2 Sołacz 62 459531.0\n", "2 False 3 0 Sołacz 15 411557.0\n", "3 False 4 0 Sołacz 14 496416.0\n", "4 False 3 0 Sołacz 15 406032.0\n", "... ... ... ... ... ... ...\n", "1652 True 2 0 Grunwald 51 299000.0\n", "1653 True 2 2 Centrum 53 339000.0\n", "1654 True 3 4 Stare 65 320000.0\n", "1655 True 3 1 Nowe 67 364000.0\n", "1656 True 3 3 Grunwald 50 209000.0\n", "\n", "[1657 rows x 6 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mieszkania_train" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.scatterplot(x='sqrMetres',y='price', data = mieszkania_train, linewidth = 0, s = 5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Pytanie- Jaki jest baseline naszego systemu?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Czym jest regresja liniowa?- przypadek jednowymiarowy" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![regresja liniowa 1](obrazki/1.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![regresja liniowa 2](obrazki/2.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![regresja liniowa 3](obrazki/3.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![regresja liniowa 4](obrazki/4.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## wzór na regresję w przypadku jednowymiarowym?\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$Y = a*X_1 + b$\n", "\n", "$Y = w_1 * X_1 + w_0$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Zadanie - napisać funkcję predict_score(sqr_metres) która zwraca cenę mieszkania zgodnie z modelem regresji liniowej ( 5 minut) \n", "\n", "Należy samemu wymyślić współczynniki modelu" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "def predict_price(sqr_metres):\n", " return 2000* sqr_metres + 200000" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "240000" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predict_price(20)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "280000" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predict_price(40)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "310000" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predict_price(55)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "200000" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predict_price(0)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "Y_train_predicted = predict_price(X_train)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Mierzenie błędu" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![RMSE 1](obrazki/6.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![a](obrazki/5.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Zadanie - napisać funkcję, która liczy błąd średniowadratowy na całym zbiorze (7 minut)\n", "\n", "rmse(Y_true, Y_predicted)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "def rmse(Y_true, Y_predicted):\n", " return np.sqrt(np.sum((Y_true - Y_predicted)**2)/ len(Y_true)) " ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.0" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rmse(np.array([300_000, 250_000]), np.array([300_000, 250_000]))" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "70799.01129253148" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rmse(np.array([305_000, 250_000]) ,np.array([300_000, 350_000]) )" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "80622.57748298549" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rmse(np.array([300_000, 250_000]), np.array([330_000, 360_000]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Zadanie - za pomocą rmse policzyć błąd dla baseline (3 minuty)\n" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([357461.18628244, 357461.18628244, 357461.18628244, ...,\n", " 357461.18628244, 357461.18628244, 357461.18628244])" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.ones_like(Y_train) * Y_train.mean()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "125698.71268014389" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rmse(Y_train, np.ones_like(Y_train) * Y_train.mean())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Zadanie - za pomocą rmse policzyc błąd dla predykcji (2 minuty)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "123420.02227684396" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rmse(Y_train, Y_train_predicted)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Na jakim zbiorze najlepiej sprawdzać wyniki?\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![RMSE 2](obrazki/7.png)\n", "\n", "![RMSE 3](obrazki/8.png)\n", "\n" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "mieszkania_dev = pd.read_csv(DATA_DIR/'dev-0'/'in.tsv', sep = '\\t', names = names)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "with open(DATA_DIR/'dev-0'/'expected.tsv','r') as dev_exp_f:\n", " Y_dev = np.array([float(x.rstrip('\\n')) for x in dev_exp_f.readlines()])" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "mieszkania_dev['price'] = Y_dev" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "X_dev = mieszkania_dev['sqrMetres'].to_numpy()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.scatterplot(x='sqrMetres',y='price', data = mieszkania_dev, linewidth = 0, s = 5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Zadanie - policzyć rmse dla predykcji ze zbioru deweloperskiego modelu baseline i naszego modelu regresji liniowej" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "Y_dev_predicted = predict_price(X_dev)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "117309.3154367544" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rmse(Y_dev, np.ones_like(Y_dev) * Y_dev.mean())" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "104227.56492755697" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rmse(Y_dev, Y_dev_predicted)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Przypadek wielowymiarowy" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.scatterplot(x='floor',y='price', data = mieszkania_train, linewidth = 0, s = 5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$Y = w_1 * X_1 + w_2 * X_1 + w_3 * X_3 + w_0$" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Zadanie. Napisać analogiczną funkcję predict_price(sqr_metres, floor), policzyć rmse dla takiego modelu ( 7 minut)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "def predict_price(sqr_metres, floor):\n", " return 4000* sqr_metres + (-1000)* floor + 100000" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "298000" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predict_price(50, 2)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "295000" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predict_price(50, 5)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "Y_dev_predicted = predict_price(mieszkania_dev['sqrMetres'], mieszkania_dev['floor'])" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "117436.43511182851" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rmse(Y_dev, np.mean(Y_train))" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "100227.89896326358" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rmse(Y_dev, Y_dev_predicted)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## jak dobrać najlepsze parametry?" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.lmplot(x='sqrMetres',y='price', data = mieszkania_train)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "lm_model = LinearRegression()" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "LinearRegression()" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lm_model.fit(mieszkania_train[['isNew','rooms', 'floor', 'sqrMetres']], Y_train)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "Y_train_predicted = lm_model.predict(mieszkania_train[['isNew','rooms', 'floor', 'sqrMetres']])" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "103308.92502763818" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rmse(Y_train, Y_train_predicted)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "Y_dev_predicted = lm_model.predict(mieszkania_dev[['isNew','rooms', 'floor', 'sqrMetres']])" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "84157.87889057388" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rmse(Y_dev, Y_dev_predicted)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/kuba/anaconda3/envs/zajeciaei/lib/python3.10/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n", " warnings.warn(\n" ] }, { "data": { "text/plain": [ "array([469449.27836213])" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lm_model.predict(np.array(([[0, 4, 3, 70]])))" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/kuba/anaconda3/envs/zajeciaei/lib/python3.10/site-packages/sklearn/base.py:450: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n", " warnings.warn(\n" ] }, { "data": { "text/plain": [ "array([455982.54297977])" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lm_model.predict(np.array(([[0, 4, 3, 60]])))" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 4522.65059749, 73763.4125433 , -78.83243119, 1346.67353824])" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lm_model.coef_" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "80364.9778059895" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lm_model.intercept_" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "455982.5429800203" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "0 * 4522.65059749 + 4* 73763.4125433 + 3 * (-78.83243119) + 60 * 1346.67353824 + 80364.97780599032" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "with open(DATA_DIR/'dev-0'/'out.tsv','w') as f_out_file:\n", " for line in Y_dev_predicted:\n", " f_out_file.write(str(line))\n", " f_out_file.write('\\n')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Uwaga - regresja linowa działa dobrze tylko dla danych, gdzie występuje korelacja liniowa" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![RMSE 5](obrazki/9.png)\n", "\n", "![6](obrazki/10.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Zadanie domowe\n", "\n", "\n", "- https://gonito.net/challenge/retroc2\n", "- termin 17.05\n", "- należy użyć wektoryzacji (np tf-dif)\n", "- wynik zaliczający to max 50 RMSE dla dev-0 \n", "- punkty: 60, dla 3 najlepszych wyników na test-A: 80,\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "author": "Jakub Pokrywka", "email": "kubapok@wmi.amu.edu.pl", "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "lang": "pl", "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" }, "subtitle": "7.Regresja liniowa[ćwiczenia]", "title": "Ekstrakcja informacji", "year": "2021" }, "nbformat": 4, "nbformat_minor": 4 }