{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Regresja liniowa" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## import bibliotek" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from pathlib import Path\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from sklearn.linear_model import LinearRegression\n", "plt.rcParams['figure.figsize'] = [10, 5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Zbiór \n", "\n", "https://git.wmi.amu.edu.pl/kubapok/mieszkania2-below1m-public" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## ładowanie zbioru train" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "DATA_DIR = Path('/home/kuba/Syncthing/przedmioty/2020-02/ISI/zajecia7_regresja_liniowa/mieszkania2')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "with open(DATA_DIR / 'names') as f_names:\n", " names = f_names.read().rstrip('\\n').split('\\t')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "mieszkania_train = pd.read_csv(DATA_DIR/'train/in.tsv', sep ='\\t', names=names)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | isNew | \n", "rooms | \n", "floor | \n", "location | \n", "sqrMetres | \n", "
---|---|---|---|---|---|
0 | \n", "False | \n", "3 | \n", "1 | \n", "Centrum | \n", "78 | \n", "
1 | \n", "False | \n", "3 | \n", "2 | \n", "Sołacz | \n", "62 | \n", "
2 | \n", "False | \n", "3 | \n", "0 | \n", "Sołacz | \n", "15 | \n", "
3 | \n", "False | \n", "4 | \n", "0 | \n", "Sołacz | \n", "14 | \n", "
4 | \n", "False | \n", "3 | \n", "0 | \n", "Sołacz | \n", "15 | \n", "
\n", " | isNew | \n", "rooms | \n", "floor | \n", "location | \n", "sqrMetres | \n", "price | \n", "
---|---|---|---|---|---|---|
0 | \n", "False | \n", "3 | \n", "1 | \n", "Centrum | \n", "78 | \n", "476118.0 | \n", "
1 | \n", "False | \n", "3 | \n", "2 | \n", "Sołacz | \n", "62 | \n", "459531.0 | \n", "
2 | \n", "False | \n", "3 | \n", "0 | \n", "Sołacz | \n", "15 | \n", "411557.0 | \n", "
3 | \n", "False | \n", "4 | \n", "0 | \n", "Sołacz | \n", "14 | \n", "496416.0 | \n", "
4 | \n", "False | \n", "3 | \n", "0 | \n", "Sołacz | \n", "15 | \n", "406032.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
1652 | \n", "True | \n", "2 | \n", "0 | \n", "Grunwald | \n", "51 | \n", "299000.0 | \n", "
1653 | \n", "True | \n", "2 | \n", "2 | \n", "Centrum | \n", "53 | \n", "339000.0 | \n", "
1654 | \n", "True | \n", "3 | \n", "4 | \n", "Stare | \n", "65 | \n", "320000.0 | \n", "
1655 | \n", "True | \n", "3 | \n", "1 | \n", "Nowe | \n", "67 | \n", "364000.0 | \n", "
1656 | \n", "True | \n", "3 | \n", "3 | \n", "Grunwald | \n", "50 | \n", "209000.0 | \n", "
1657 rows × 6 columns
\n", "