2023-10-17 17:35:43 +02:00
|
|
|
|
{
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 316,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"import pandas as pd\n",
|
|
|
|
|
"from statistics import mean,median\n",
|
|
|
|
|
"import re\n",
|
|
|
|
|
"import numpy as np\n",
|
|
|
|
|
" "
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
2023-10-17 18:14:03 +02:00
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## Wczytanie datasetów"
|
|
|
|
|
]
|
|
|
|
|
},
|
2023-10-17 17:35:43 +02:00
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 223,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"train_dataset = pd.read_csv(\"./train/train.tsv\", sep = \"\\t\", header=None)"
|
2023-10-17 18:14:03 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"### Data exploration "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 188,
|
2023-10-17 18:14:03 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <th>5</th>\n",
|
|
|
|
|
" <th>6</th>\n",
|
|
|
|
|
" <th>7</th>\n",
|
|
|
|
|
" <th>8</th>\n",
|
|
|
|
|
" <th>9</th>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <th>...</th>\n",
|
2023-10-17 18:14:03 +02:00
|
|
|
|
" <th>16</th>\n",
|
|
|
|
|
" <th>17</th>\n",
|
|
|
|
|
" <th>18</th>\n",
|
|
|
|
|
" <th>19</th>\n",
|
|
|
|
|
" <th>20</th>\n",
|
|
|
|
|
" <th>21</th>\n",
|
|
|
|
|
" <th>22</th>\n",
|
|
|
|
|
" <th>23</th>\n",
|
|
|
|
|
" <th>24</th>\n",
|
|
|
|
|
" <th>25</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>309000.0</td>\n",
|
|
|
|
|
" <td>do zamieszkania</td>\n",
|
|
|
|
|
" <td>390 zł</td>\n",
|
|
|
|
|
" <td>spółdzielcze własnościowe</td>\n",
|
|
|
|
|
" <td>7113</td>\n",
|
|
|
|
|
" <td>https://www.otodom.pl/oferta/niezalezny-uklad-...</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>43.44</td>\n",
|
|
|
|
|
" <td>wtórny</td>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <td>...</td>\n",
|
2023-10-17 18:14:03 +02:00
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>gazowe</td>\n",
|
|
|
|
|
" <td>plastikowe</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>cegła</td>\n",
|
|
|
|
|
" <td>Polecamy na sprzedaż dwupokojowe mieszkanie p...</td>\n",
|
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
|
" <td>telewizja kablowa, internet, meble, piwnica, g...</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"<p>1 rows × 26 columns</p>\n",
|
2023-10-17 18:14:03 +02:00
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
|
|
|
|
" 0 1 2 3 4 \\\n",
|
|
|
|
|
"0 309000.0 do zamieszkania 390 zł spółdzielcze własnościowe 7113 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" 5 6 7 8 9 \\\n",
|
|
|
|
|
"0 https://www.otodom.pl/oferta/niezalezny-uklad-... 2 NaN 43.44 wtórny \n",
|
|
|
|
|
"\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" ... 16 17 18 19 20 21 22 \\\n",
|
|
|
|
|
"0 ... NaN gazowe plastikowe NaN NaN NaN cegła \n",
|
2023-10-17 18:14:03 +02:00
|
|
|
|
"\n",
|
|
|
|
|
" 23 24 \\\n",
|
|
|
|
|
"0 Polecamy na sprzedaż dwupokojowe mieszkanie p... NaN \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" 25 \n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"0 telewizja kablowa, internet, meble, piwnica, g... \n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[1 rows x 26 columns]"
|
2023-10-17 18:14:03 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 188,
|
2023-10-17 18:14:03 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"train_dataset.head(1)"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
2023-10-28 14:25:18 +02:00
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## Wczytywanie danych testowych i preprocessing jak na treningu"
|
|
|
|
|
]
|
|
|
|
|
},
|
2023-10-17 17:35:43 +02:00
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 243,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"COLUMN 15:\n",
|
|
|
|
|
"Value counts before changes:\n",
|
|
|
|
|
" 15\n",
|
|
|
|
|
" 1 569\n",
|
|
|
|
|
" 2 527\n",
|
|
|
|
|
" 0 452\n",
|
|
|
|
|
" 4 357\n",
|
|
|
|
|
" 3 321\n",
|
|
|
|
|
" 5 117\n",
|
|
|
|
|
" 6 51\n",
|
|
|
|
|
" 7 42\n",
|
|
|
|
|
" 8 32\n",
|
|
|
|
|
" 10 29\n",
|
|
|
|
|
" 11 24\n",
|
|
|
|
|
" 9 21\n",
|
|
|
|
|
"-1 5\n",
|
|
|
|
|
"Name: count, dtype: int64\n",
|
|
|
|
|
"Value counts after changes:\n",
|
|
|
|
|
" 15\n",
|
|
|
|
|
" 1 569\n",
|
|
|
|
|
" 2 527\n",
|
|
|
|
|
" 0 452\n",
|
|
|
|
|
" 4 357\n",
|
|
|
|
|
" 3 321\n",
|
|
|
|
|
" 5 117\n",
|
|
|
|
|
" 6 51\n",
|
|
|
|
|
" 7 42\n",
|
|
|
|
|
" 8 32\n",
|
|
|
|
|
" 10 29\n",
|
|
|
|
|
" 11 24\n",
|
|
|
|
|
" 9 21\n",
|
|
|
|
|
"-1 5\n",
|
|
|
|
|
"Name: count, dtype: int64\n",
|
|
|
|
|
"COLUMN 8:\n",
|
|
|
|
|
"0 43.44\n",
|
|
|
|
|
"1 42.60\n",
|
|
|
|
|
"2 44.30\n",
|
|
|
|
|
"3 88.00\n",
|
|
|
|
|
"4 77.00\n",
|
|
|
|
|
" ... \n",
|
|
|
|
|
"2542 94.00\n",
|
|
|
|
|
"2543 53.50\n",
|
|
|
|
|
"2544 55.25\n",
|
|
|
|
|
"2545 62.00\n",
|
|
|
|
|
"2546 392.00\n",
|
|
|
|
|
"Name: 8, Length: 2547, dtype: float64\n",
|
|
|
|
|
"COLUMN 6:\n",
|
|
|
|
|
"Value counts before changes:\n",
|
|
|
|
|
" 6\n",
|
|
|
|
|
"2 1014\n",
|
|
|
|
|
"3 878\n",
|
|
|
|
|
"4 293\n",
|
|
|
|
|
"1 271\n",
|
|
|
|
|
"5 64\n",
|
|
|
|
|
"6 13\n",
|
|
|
|
|
"7 7\n",
|
|
|
|
|
"10 6\n",
|
|
|
|
|
"9 1\n",
|
|
|
|
|
"Name: count, dtype: int64\n",
|
|
|
|
|
"Value counts after changes:\n",
|
|
|
|
|
" 6\n",
|
|
|
|
|
"2 1014\n",
|
|
|
|
|
"3 878\n",
|
|
|
|
|
"4 293\n",
|
|
|
|
|
"1 271\n",
|
|
|
|
|
"5 64\n",
|
|
|
|
|
"6 13\n",
|
|
|
|
|
"7 7\n",
|
|
|
|
|
"10 6\n",
|
|
|
|
|
"9 1\n",
|
|
|
|
|
"Name: count, dtype: int64\n"
|
|
|
|
|
]
|
2023-10-17 17:35:43 +02:00
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"\n",
|
|
|
|
|
"# Preprocessing column 15:\n",
|
|
|
|
|
"print(\"COLUMN 15:\")\n",
|
|
|
|
|
"# Count the occurrence of unique values in column before preprocessing:\n",
|
|
|
|
|
"print(\"Value counts before changes:\\n\",train_dataset[15].value_counts())\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Replace string to int or NaN:\n",
|
|
|
|
|
"train_dataset[15] = train_dataset[15].replace({\"parter\": 0, \"suterena\": -1, \"> 10\": 11, \"poddasze\": np.nan})\n",
|
|
|
|
|
"train_dataset[15] = train_dataset[15].apply(float)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Fill Nans with median:\n",
|
|
|
|
|
"train_dataset[15].fillna(train_dataset[15].median(), inplace=True)\n",
|
|
|
|
|
"train_dataset[15]= train_dataset[15].apply(int)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Count the occurrence of unique values in column after preprocessing:\n",
|
|
|
|
|
"print(\"Value counts after changes:\\n\",train_dataset[15].value_counts())\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Preprocessing column 8:\n",
|
|
|
|
|
"print(\"COLUMN 8:\")\n",
|
|
|
|
|
"# Replace strings containing space to NaN:\n",
|
|
|
|
|
"train_dataset[8] = train_dataset[8].replace(' ', np.nan, regex=True)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Fill Nans with median:\n",
|
|
|
|
|
"train_dataset[8] = train_dataset[8].apply(float)\n",
|
|
|
|
|
"train_dataset[8].fillna(train_dataset[8].median(), inplace=True)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(train_dataset[8])\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Preprocessing column 6:\n",
|
|
|
|
|
"print(\"COLUMN 6:\")\n",
|
|
|
|
|
"# Count the occurrence of unique values in column before preprocessing:\n",
|
|
|
|
|
"print(\"Value counts before changes:\\n\",train_dataset[6].value_counts())\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Change string to 10:\n",
|
|
|
|
|
"train_dataset[6] = train_dataset[6].replace({\"więcej niż 10\": 10})\n",
|
|
|
|
|
"train_dataset[6] = train_dataset[6].apply(int)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Count the occurrence of unique values in column after preprocessing:\n",
|
|
|
|
|
"print(\"Value counts after changes:\\n\",train_dataset[6].value_counts())\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"train_dataset[10].fillna(train_dataset[10].median(), inplace=True)\n",
|
|
|
|
|
"train_dataset[10] = train_dataset[10].apply(float)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"train_dataset = train_dataset[[0,6,8,10,15]]\n",
|
|
|
|
|
"\n"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 275,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"test_dataset = pd.read_csv(\"./dev-0/in.tsv\", sep= \"\\t\", header=None)"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 278,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"COLUMN 15:\n",
|
|
|
|
|
"Value counts before changes:\n",
|
|
|
|
|
" 14\n",
|
|
|
|
|
" 1 108\n",
|
|
|
|
|
" 2 89\n",
|
|
|
|
|
" 0 82\n",
|
|
|
|
|
" 4 65\n",
|
|
|
|
|
" 3 54\n",
|
|
|
|
|
" 5 22\n",
|
|
|
|
|
" 6 12\n",
|
|
|
|
|
" 7 9\n",
|
|
|
|
|
" 11 9\n",
|
|
|
|
|
" 10 5\n",
|
|
|
|
|
" 8 3\n",
|
|
|
|
|
"-1 2\n",
|
|
|
|
|
" 9 2\n",
|
|
|
|
|
"Name: count, dtype: int64\n",
|
|
|
|
|
"Value counts after changes:\n",
|
|
|
|
|
" 14\n",
|
|
|
|
|
" 1 108\n",
|
|
|
|
|
" 2 89\n",
|
|
|
|
|
" 0 82\n",
|
|
|
|
|
" 4 65\n",
|
|
|
|
|
" 3 54\n",
|
|
|
|
|
" 5 22\n",
|
|
|
|
|
" 6 12\n",
|
|
|
|
|
" 7 9\n",
|
|
|
|
|
" 11 9\n",
|
|
|
|
|
" 10 5\n",
|
|
|
|
|
" 8 3\n",
|
|
|
|
|
"-1 2\n",
|
|
|
|
|
" 9 2\n",
|
|
|
|
|
"Name: count, dtype: int64\n",
|
|
|
|
|
"COLUMN 8:\n",
|
|
|
|
|
"0 59.10\n",
|
|
|
|
|
"1 38.00\n",
|
|
|
|
|
"2 63.84\n",
|
|
|
|
|
"3 50.00\n",
|
|
|
|
|
"4 65.62\n",
|
|
|
|
|
" ... \n",
|
|
|
|
|
"457 72.78\n",
|
|
|
|
|
"458 51.23\n",
|
|
|
|
|
"459 54.16\n",
|
|
|
|
|
"460 90.10\n",
|
|
|
|
|
"461 71.90\n",
|
|
|
|
|
"Name: 7, Length: 462, dtype: float64\n",
|
|
|
|
|
"COLUMN 6:\n",
|
|
|
|
|
"Value counts before changes:\n",
|
|
|
|
|
" 5\n",
|
|
|
|
|
"2 196\n",
|
|
|
|
|
"3 152\n",
|
|
|
|
|
"1 51\n",
|
|
|
|
|
"4 50\n",
|
|
|
|
|
"5 9\n",
|
|
|
|
|
"6 4\n",
|
|
|
|
|
"Name: count, dtype: int64\n",
|
|
|
|
|
"Value counts after changes:\n",
|
|
|
|
|
" 5\n",
|
|
|
|
|
"2 196\n",
|
|
|
|
|
"3 152\n",
|
|
|
|
|
"1 51\n",
|
|
|
|
|
"4 50\n",
|
|
|
|
|
"5 9\n",
|
|
|
|
|
"6 4\n",
|
|
|
|
|
"Name: count, dtype: int64\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"\n",
|
|
|
|
|
"# Preprocessing column 15:\n",
|
|
|
|
|
"print(\"COLUMN 15:\")\n",
|
|
|
|
|
"# Count the occurrence of unique values in column before preprocessing:\n",
|
|
|
|
|
"print(\"Value counts before changes:\\n\",test_dataset[14].value_counts())\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Replace string to int or NaN:\n",
|
|
|
|
|
"test_dataset[14] = test_dataset[14].replace({\"parter\": 0, \"suterena\": -1, \"> 10\": 11, \"poddasze\": np.nan})\n",
|
|
|
|
|
"test_dataset[14] = test_dataset[14].apply(float)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Fill Nans with median:\n",
|
|
|
|
|
"test_dataset[14].fillna(test_dataset[14].median(), inplace=True)\n",
|
|
|
|
|
"test_dataset[14]= test_dataset[14].apply(int)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Count the occurrence of unique values in column after preprocessing:\n",
|
|
|
|
|
"print(\"Value counts after changes:\\n\",test_dataset[14].value_counts())\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Preprocessing column 8:\n",
|
|
|
|
|
"print(\"COLUMN 8:\")\n",
|
|
|
|
|
"# Replace strings containing space to NaN:\n",
|
|
|
|
|
"test_dataset[7] = test_dataset[7].replace(' ', np.nan, regex=True)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Fill Nans with median:\n",
|
|
|
|
|
"test_dataset[7] = test_dataset[7].apply(float)\n",
|
|
|
|
|
"test_dataset[7].fillna(test_dataset[7].median(), inplace=True)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(test_dataset[7])\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Preprocessing column 6:\n",
|
|
|
|
|
"print(\"COLUMN 6:\")\n",
|
|
|
|
|
"# Count the occurrence of unique values in column before preprocessing:\n",
|
|
|
|
|
"print(\"Value counts before changes:\\n\",test_dataset[5].value_counts())\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Change string to 10:\n",
|
|
|
|
|
"test_dataset[5] = test_dataset[5].replace({\"więcej niż 10\": 10})\n",
|
|
|
|
|
"test_dataset[5] = test_dataset[5].apply(int)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Count the occurrence of unique values in column after preprocessing:\n",
|
|
|
|
|
"print(\"Value counts after changes:\\n\",test_dataset[5].value_counts())\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"test_dataset[9].fillna(test_dataset[9].median(), inplace=True)\n",
|
|
|
|
|
"test_dataset[9] = test_dataset[9].apply(float)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"test_dataset = test_dataset[[5,7,9,14]]\n",
|
|
|
|
|
"\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 305,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <th>5</th>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" <th>7</th>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <th>9</th>\n",
|
|
|
|
|
" <th>14</th>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>59.1</td>\n",
|
|
|
|
|
" <td>4.0</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" 5 7 9 14\n",
|
|
|
|
|
"0 3 59.1 4.0 2"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 305,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"test_dataset.head(1)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## Model"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 234,
|
2023-10-17 18:14:03 +02:00
|
|
|
|
"metadata": {},
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"outputs": [],
|
2023-10-17 18:14:03 +02:00
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"from sklearn.linear_model import LinearRegression\n",
|
|
|
|
|
"from sklearn.preprocessing import StandardScaler"
|
2023-10-17 18:14:03 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 291,
|
2023-10-17 18:14:03 +02:00
|
|
|
|
"metadata": {},
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"outputs": [],
|
2023-10-17 18:14:03 +02:00
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"X_train = train_dataset.drop(0,axis=1)\n",
|
|
|
|
|
"y_train = train_dataset[[0]]\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"scaler = StandardScaler()\n",
|
|
|
|
|
"trans_data = scaler.fit_transform(X)"
|
2023-10-17 18:14:03 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 292,
|
2023-10-17 18:14:03 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"X_test = test_dataset"
|
2023-10-17 18:14:03 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 293,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"outputs": [],
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"reg = LinearRegression()"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 294,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"outputs": [],
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"reg.fit(X_train, y_train)\n",
|
|
|
|
|
"results = reg.predict(X_test)"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
2023-10-17 18:14:03 +02:00
|
|
|
|
{
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 265,
|
2023-10-17 18:14:03 +02:00
|
|
|
|
"metadata": {},
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"outputs": [],
|
2023-10-17 18:14:03 +02:00
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"import pickle\n",
|
|
|
|
|
"from sklearn.metrics import r2_score\n",
|
|
|
|
|
"# pickle.dump(reg, open(\"model.pkl\", \"wb\"))"
|
2023-10-17 18:14:03 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
2023-10-17 17:35:43 +02:00
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 295,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
|
|
|
|
"array([[ 394901.20434554],\n",
|
|
|
|
|
" [ 293271.41755997],\n",
|
|
|
|
|
" [ 432666.21541136],\n",
|
|
|
|
|
" [ 295330.00173591],\n",
|
|
|
|
|
" [ 444001.60173013],\n",
|
|
|
|
|
" [ 595102.64364947],\n",
|
|
|
|
|
" [ 306562.53056792],\n",
|
|
|
|
|
" [ 346367.98664224],\n",
|
|
|
|
|
" [ 320708.28590668],\n",
|
|
|
|
|
" [ 355678.48101873],\n",
|
|
|
|
|
" [ 272087.02326267],\n",
|
|
|
|
|
" [ 315111.17058773],\n",
|
|
|
|
|
" [ 408743.59976314],\n",
|
|
|
|
|
" [ 392452.80148004],\n",
|
|
|
|
|
" [ 351776.80580328],\n",
|
|
|
|
|
" [ 325340.80253875],\n",
|
|
|
|
|
" [ 324415.74401286],\n",
|
|
|
|
|
" [ 286605.64798109],\n",
|
|
|
|
|
" [ 354409.52248178],\n",
|
|
|
|
|
" [ 273703.59297418],\n",
|
|
|
|
|
" [ 343402.57630023],\n",
|
|
|
|
|
" [ 285271.79788568],\n",
|
|
|
|
|
" [ 370401.6837088 ],\n",
|
|
|
|
|
" [ 482522.18182752],\n",
|
|
|
|
|
" [ 394119.81457774],\n",
|
|
|
|
|
" [ 474686.42204592],\n",
|
|
|
|
|
" [ 627962.51992074],\n",
|
|
|
|
|
" [ 334221.5006669 ],\n",
|
|
|
|
|
" [ 397338.23464708],\n",
|
|
|
|
|
" [ 414336.53457982],\n",
|
|
|
|
|
" [ 335485.00759125],\n",
|
|
|
|
|
" [ 351363.03195993],\n",
|
|
|
|
|
" [ 384755.53002533],\n",
|
|
|
|
|
" [ 374013.34228146],\n",
|
|
|
|
|
" [ 384755.53002533],\n",
|
|
|
|
|
" [ 355882.69461662],\n",
|
|
|
|
|
" [ 521392.20674093],\n",
|
|
|
|
|
" [ 425546.58946533],\n",
|
|
|
|
|
" [ 294138.97474644],\n",
|
|
|
|
|
" [ 539668.46177031],\n",
|
|
|
|
|
" [ 340107.2565533 ],\n",
|
|
|
|
|
" [ 467566.06735236],\n",
|
|
|
|
|
" [ 228315.74093461],\n",
|
|
|
|
|
" [ 373516.14139746],\n",
|
|
|
|
|
" [ 572962.98245529],\n",
|
|
|
|
|
" [ 425442.35590324],\n",
|
|
|
|
|
" [ 264688.02449027],\n",
|
|
|
|
|
" [ 321384.60839985],\n",
|
|
|
|
|
" [ 313697.70958017],\n",
|
|
|
|
|
" [ 257339.46910406],\n",
|
|
|
|
|
" [ 285491.92892354],\n",
|
|
|
|
|
" [ 265315.27967261],\n",
|
|
|
|
|
" [ 269349.61545595],\n",
|
|
|
|
|
" [ 370207.10222578],\n",
|
|
|
|
|
" [ 505152.41437514],\n",
|
|
|
|
|
" [ 326640.0334956 ],\n",
|
|
|
|
|
" [ 361868.73382815],\n",
|
|
|
|
|
" [ 641520.72645455],\n",
|
|
|
|
|
" [ 513506.74409331],\n",
|
|
|
|
|
" [ 225524.54295198],\n",
|
|
|
|
|
" [ 237226.80467502],\n",
|
|
|
|
|
" [ 453176.39203834],\n",
|
|
|
|
|
" [ 261995.60845714],\n",
|
|
|
|
|
" [ 955187.2509814 ],\n",
|
|
|
|
|
" [ 492991.37526251],\n",
|
|
|
|
|
" [ 374938.40080734],\n",
|
|
|
|
|
" [ 774416.69022809],\n",
|
|
|
|
|
" [ 523696.94084834],\n",
|
|
|
|
|
" [ 434831.26310559],\n",
|
|
|
|
|
" [ 489623.47044873],\n",
|
|
|
|
|
" [ 280423.33071801],\n",
|
|
|
|
|
" [ 264688.02449027],\n",
|
|
|
|
|
" [ 280814.31555288],\n",
|
|
|
|
|
" [ 359688.46533354],\n",
|
|
|
|
|
" [ 314210.82790415],\n",
|
|
|
|
|
" [ 622185.54247246],\n",
|
|
|
|
|
" [ 448643.38610329],\n",
|
|
|
|
|
" [ 561068.71341085],\n",
|
|
|
|
|
" [ 331068.80586855],\n",
|
|
|
|
|
" [ 387502.56994691],\n",
|
|
|
|
|
" [ 251024.38630808],\n",
|
|
|
|
|
" [ 295949.29222845],\n",
|
|
|
|
|
" [ 311115.63444521],\n",
|
|
|
|
|
" [ 307926.65308324],\n",
|
|
|
|
|
" [ 268121.19700892],\n",
|
|
|
|
|
" [ 329516.02282157],\n",
|
|
|
|
|
" [ 260145.38644037],\n",
|
|
|
|
|
" [ 402080.9057926 ],\n",
|
|
|
|
|
" [ 247029.12134696],\n",
|
|
|
|
|
" [ 293241.9788526 ],\n",
|
|
|
|
|
" [ 968741.83954407],\n",
|
|
|
|
|
" [ 463714.83552309],\n",
|
|
|
|
|
" [ 458749.5202625 ],\n",
|
|
|
|
|
" [ 467566.06735236],\n",
|
|
|
|
|
" [ 251228.59990596],\n",
|
|
|
|
|
" [ 367839.0507926 ],\n",
|
|
|
|
|
" [ 178563.73689914],\n",
|
|
|
|
|
" [ 404748.33480203],\n",
|
|
|
|
|
" [ 361629.76401877],\n",
|
|
|
|
|
" [ 335003.99532863],\n",
|
|
|
|
|
" [ 468590.27220151],\n",
|
|
|
|
|
" [ 342562.21592532],\n",
|
|
|
|
|
" [ 371236.86365252],\n",
|
|
|
|
|
" [ 310876.80170877],\n",
|
|
|
|
|
" [ 276928.20517629],\n",
|
|
|
|
|
" [ 273106.88139315],\n",
|
|
|
|
|
" [ 517720.942006 ],\n",
|
|
|
|
|
" [ 309671.80782539],\n",
|
|
|
|
|
" [ 321349.71807989],\n",
|
|
|
|
|
" [ 346542.76153275],\n",
|
|
|
|
|
" [ 374434.93476654],\n",
|
|
|
|
|
" [ 919566.5541124 ],\n",
|
|
|
|
|
" [ 295586.59732644],\n",
|
|
|
|
|
" [ 394954.99452145],\n",
|
|
|
|
|
" [ 259707.15616347],\n",
|
|
|
|
|
" [ 296950.34369535],\n",
|
|
|
|
|
" [ 390519.03008681],\n",
|
|
|
|
|
" [ 338032.92115378],\n",
|
|
|
|
|
" [ 279645.24385936],\n",
|
|
|
|
|
" [ 394134.53393142],\n",
|
|
|
|
|
" [ 279107.13851167],\n",
|
|
|
|
|
" [ 454032.21228709],\n",
|
|
|
|
|
" [ 959467.06920008],\n",
|
|
|
|
|
" [ 683669.88249351],\n",
|
|
|
|
|
" [ 371046.82704552],\n",
|
|
|
|
|
" [ 576981.26372572],\n",
|
|
|
|
|
" [ 294170.56215725],\n",
|
|
|
|
|
" [ 443250.48438223],\n",
|
|
|
|
|
" [ 283176.45780734],\n",
|
|
|
|
|
" [ 434794.43417916],\n",
|
|
|
|
|
" [ 344911.47246756],\n",
|
|
|
|
|
" [ 272235.14370441],\n",
|
|
|
|
|
" [ 824501.58608444],\n",
|
|
|
|
|
" [ 334396.91111258],\n",
|
|
|
|
|
" [ 350120.43652201],\n",
|
|
|
|
|
" [ 253527.41306202],\n",
|
|
|
|
|
" [ 293854.69267028],\n",
|
|
|
|
|
" [ 291547.17430427],\n",
|
|
|
|
|
" [ 270927.11434524],\n",
|
|
|
|
|
" [ 383959.89024261],\n",
|
|
|
|
|
" [ 656199.3017448 ],\n",
|
|
|
|
|
" [ 553341.638875 ],\n",
|
|
|
|
|
" [ 432687.22009017],\n",
|
|
|
|
|
" [ 526139.69394388],\n",
|
|
|
|
|
" [ 447026.81639178],\n",
|
|
|
|
|
" [ 274087.19936264],\n",
|
|
|
|
|
" [ 397721.29867274],\n",
|
|
|
|
|
" [ 491976.34496209],\n",
|
|
|
|
|
" [ 334112.72222878],\n",
|
|
|
|
|
" [ 564079.20871881],\n",
|
|
|
|
|
" [ 262283.77968584],\n",
|
|
|
|
|
" [ 325888.17562753],\n",
|
|
|
|
|
" [ 707612.5028916 ],\n",
|
|
|
|
|
" [ 392111.24832978],\n",
|
|
|
|
|
" [ 498668.84201377],\n",
|
|
|
|
|
" [ 287777.33771682],\n",
|
|
|
|
|
" [ 269508.93047261],\n",
|
|
|
|
|
" [ 438284.33540911],\n",
|
|
|
|
|
" [ 325678.41722469],\n",
|
|
|
|
|
" [ 451638.79768381],\n",
|
|
|
|
|
" [ 313355.0076554 ],\n",
|
|
|
|
|
" [ 507395.87489522],\n",
|
|
|
|
|
" [ 319663.80512625],\n",
|
|
|
|
|
" [ 334586.40535677],\n",
|
|
|
|
|
" [ 336983.25994216],\n",
|
|
|
|
|
" [ 241704.55116646],\n",
|
|
|
|
|
" [ 551645.5514437 ],\n",
|
|
|
|
|
" [ 263184.95608194],\n",
|
|
|
|
|
" [ 353876.49260028],\n",
|
|
|
|
|
" [ 261995.60845714],\n",
|
|
|
|
|
" [ 310876.80170877],\n",
|
|
|
|
|
" [ 350329.83055109],\n",
|
|
|
|
|
" [ 322549.60455611],\n",
|
|
|
|
|
" [ 394105.09522405],\n",
|
|
|
|
|
" [ 553247.67298293],\n",
|
|
|
|
|
" [ 318793.17233144],\n",
|
|
|
|
|
" [ 467137.74037172],\n",
|
|
|
|
|
" [ 242785.12035314],\n",
|
|
|
|
|
" [ 382352.68146455],\n",
|
|
|
|
|
" [ 327735.05085456],\n",
|
|
|
|
|
" [ 517343.1516039 ],\n",
|
|
|
|
|
" [ 290640.3481382 ],\n",
|
|
|
|
|
" [ 219340.07698546],\n",
|
|
|
|
|
" [ 298372.97925164],\n",
|
|
|
|
|
" [ 194674.57986053],\n",
|
|
|
|
|
" [ 273246.9321801 ],\n",
|
|
|
|
|
" [ 332307.2208042 ],\n",
|
|
|
|
|
" [ 264140.01584633],\n",
|
|
|
|
|
" [ 457026.61216604],\n",
|
|
|
|
|
" [ 395940.59788713],\n",
|
|
|
|
|
" [ 290989.89791922],\n",
|
|
|
|
|
" [ 473556.78554839],\n",
|
|
|
|
|
" [ 318678.94228073],\n",
|
|
|
|
|
" [ 485526.48500272],\n",
|
|
|
|
|
" [ 325262.02533913],\n",
|
|
|
|
|
" [ 481691.81794123],\n",
|
|
|
|
|
" [ 339771.01794269],\n",
|
|
|
|
|
" [ 367305.38535593],\n",
|
|
|
|
|
" [ 443173.11536588],\n",
|
|
|
|
|
" [ 505575.09158582],\n",
|
|
|
|
|
" [ 318678.94228073],\n",
|
|
|
|
|
" [ 216558.78229908],\n",
|
|
|
|
|
" [ 233490.17904287],\n",
|
|
|
|
|
" [ 394075.65651668],\n",
|
|
|
|
|
" [ 295760.63169678],\n",
|
|
|
|
|
" [ 258795.98327875],\n",
|
|
|
|
|
" [ 346353.26728856],\n",
|
|
|
|
|
" [ 305592.11589469],\n",
|
|
|
|
|
" [ 443578.56014568],\n",
|
|
|
|
|
" [ 300577.82651562],\n",
|
|
|
|
|
" [ 402095.62514628],\n",
|
|
|
|
|
" [ 513536.18280069],\n",
|
|
|
|
|
" [ 757593.69577602],\n",
|
|
|
|
|
" [ 287777.33771682],\n",
|
|
|
|
|
" [ 397881.4272336 ],\n",
|
|
|
|
|
" [ 414604.91098864],\n",
|
|
|
|
|
" [ 349552.42312819],\n",
|
|
|
|
|
" [ 534205.58125196],\n",
|
|
|
|
|
" [ 378854.88856884],\n",
|
|
|
|
|
" [ 348203.48930532],\n",
|
|
|
|
|
" [ 333984.13719815],\n",
|
|
|
|
|
" [ 330351.10957292],\n",
|
|
|
|
|
" [ 182753.04098048],\n",
|
|
|
|
|
" [ 329142.59091077],\n",
|
|
|
|
|
" [ 290053.63304579],\n",
|
|
|
|
|
" [ 309309.5704895 ],\n",
|
|
|
|
|
" [ 481568.04896803],\n",
|
|
|
|
|
" [ 270985.99175998],\n",
|
|
|
|
|
" [ 327735.05085456],\n",
|
|
|
|
|
" [ 237495.07611884],\n",
|
|
|
|
|
" [ 295905.1341674 ],\n",
|
|
|
|
|
" [ 267893.10128127],\n",
|
|
|
|
|
" [ 305518.51912627],\n",
|
|
|
|
|
" [ 289640.4947576 ],\n",
|
|
|
|
|
" [ 448016.13092096],\n",
|
|
|
|
|
" [ 448276.23951762],\n",
|
|
|
|
|
" [ 306562.53056792],\n",
|
|
|
|
|
" [ 763649.68167691],\n",
|
|
|
|
|
" [ 302348.80199401],\n",
|
|
|
|
|
" [ 316359.12444587],\n",
|
|
|
|
|
" [ 290899.54999829],\n",
|
|
|
|
|
" [ 231743.82621443],\n",
|
|
|
|
|
" [ 313404.81548641],\n",
|
|
|
|
|
" [ 278816.39312135],\n",
|
|
|
|
|
" [ 383639.70614492],\n",
|
|
|
|
|
" [ 300989.76671752],\n",
|
|
|
|
|
" [ 320213.59809987],\n",
|
|
|
|
|
" [ 327790.31029817],\n",
|
|
|
|
|
" [ 365619.94174106],\n",
|
|
|
|
|
" [ 405226.27145632],\n",
|
|
|
|
|
" [ 351115.20266379],\n",
|
|
|
|
|
" [ 301726.6340505 ],\n",
|
|
|
|
|
" [ 239057.85565444],\n",
|
|
|
|
|
" [ 623907.54383236],\n",
|
|
|
|
|
" [ 325824.388963 ],\n",
|
|
|
|
|
" [ 305965.91217926],\n",
|
|
|
|
|
" [ 349984.19009093],\n",
|
|
|
|
|
" [ 638635.09324109],\n",
|
|
|
|
|
" [ 624904.92801633],\n",
|
|
|
|
|
" [ 421648.73037838],\n",
|
|
|
|
|
" [ 290610.1806833 ],\n",
|
|
|
|
|
" [ 288601.57055477],\n",
|
|
|
|
|
" [ 504376.11184616],\n",
|
|
|
|
|
" [ 246333.26344267],\n",
|
|
|
|
|
" [ 390474.87202575],\n",
|
|
|
|
|
" [ 676486.10550918],\n",
|
|
|
|
|
" [ 217145.59058386],\n",
|
|
|
|
|
" [ 253652.38012152],\n",
|
|
|
|
|
" [ 874149.26336545],\n",
|
|
|
|
|
" [ 241282.41631858],\n",
|
|
|
|
|
" [ 300762.91295673],\n",
|
|
|
|
|
" [ 364474.75217731],\n",
|
|
|
|
|
" [ 819789.82291398],\n",
|
|
|
|
|
" [ 227915.8527324 ],\n",
|
|
|
|
|
" [ 333171.00574275],\n",
|
|
|
|
|
" [ 406022.1093964 ],\n",
|
|
|
|
|
" [ 424620.79041928],\n",
|
|
|
|
|
" [ 433925.27065205],\n",
|
|
|
|
|
" [ 369693.34834663],\n",
|
|
|
|
|
" [ 659209.432679 ],\n",
|
|
|
|
|
" [ 377446.15042634],\n",
|
|
|
|
|
" [ 489887.75954764],\n",
|
|
|
|
|
" [ 215155.31778014],\n",
|
|
|
|
|
" [ 369953.55013564],\n",
|
|
|
|
|
" [ 541589.4962969 ],\n",
|
|
|
|
|
" [ 270941.83369892],\n",
|
|
|
|
|
" [ 342407.81015845],\n",
|
|
|
|
|
" [ 259234.77608678],\n",
|
|
|
|
|
" [ 433939.99000574],\n",
|
|
|
|
|
" [ 277310.70667081],\n",
|
|
|
|
|
" [ 675738.22998601],\n",
|
|
|
|
|
" [ 376222.07869798],\n",
|
|
|
|
|
" [ 381358.95913228],\n",
|
|
|
|
|
" [ 366112.32656763],\n",
|
|
|
|
|
" [ 311543.39889473],\n",
|
|
|
|
|
" [ 236978.24663136],\n",
|
|
|
|
|
" [ 321370.9939401 ],\n",
|
|
|
|
|
" [ 412013.97636694],\n",
|
|
|
|
|
" [ 321355.16969248],\n",
|
|
|
|
|
" [ 318664.22292705],\n",
|
|
|
|
|
" [ 382338.23329227],\n",
|
|
|
|
|
" [ 581803.00342058],\n",
|
|
|
|
|
" [ 513506.74409331],\n",
|
|
|
|
|
" [ 635007.71538581],\n",
|
|
|
|
|
" [ 270912.39499155],\n",
|
|
|
|
|
" [ 316897.22979356],\n",
|
|
|
|
|
" [ 352344.72600475],\n",
|
|
|
|
|
" [ 256796.27651754],\n",
|
|
|
|
|
" [ 453674.42663487],\n",
|
|
|
|
|
" [ 321897.63353148],\n",
|
|
|
|
|
" [ 307478.51951008],\n",
|
|
|
|
|
" [ 268956.93948377],\n",
|
|
|
|
|
" [ 557884.64129867],\n",
|
|
|
|
|
" [1047884.98951607],\n",
|
|
|
|
|
" [ 440857.06977894],\n",
|
|
|
|
|
" [ 226951.61841929],\n",
|
|
|
|
|
" [ 350348.53224967],\n",
|
|
|
|
|
" [ 443134.68009882],\n",
|
|
|
|
|
" [ 277414.30467774],\n",
|
|
|
|
|
" [ 476949.14679571],\n",
|
|
|
|
|
" [ 286093.7277434 ],\n",
|
|
|
|
|
" [ 585186.00373445],\n",
|
|
|
|
|
" [ 441842.20380586],\n",
|
|
|
|
|
" [ 265771.94046669],\n",
|
|
|
|
|
" [ 318649.50357336],\n",
|
|
|
|
|
" [ 252423.12796196],\n",
|
|
|
|
|
" [ 321470.14026336],\n",
|
|
|
|
|
" [ 316360.32253217],\n",
|
|
|
|
|
" [ 318678.94228073],\n",
|
|
|
|
|
" [ 249636.38166299],\n",
|
|
|
|
|
" [ 342591.65463269],\n",
|
|
|
|
|
" [ 255418.53954247],\n",
|
|
|
|
|
" [ 911134.08274977],\n",
|
|
|
|
|
" [ 427920.19747611],\n",
|
|
|
|
|
" [ 528632.81740156],\n",
|
|
|
|
|
" [1170092.55572165],\n",
|
|
|
|
|
" [ 394119.81457774],\n",
|
|
|
|
|
" [ 306503.65315318],\n",
|
|
|
|
|
" [ 312892.06153619],\n",
|
|
|
|
|
" [ 233843.14863766],\n",
|
|
|
|
|
" [ 490663.22836409],\n",
|
|
|
|
|
" [ 459535.36171397],\n",
|
|
|
|
|
" [ 333969.41784446],\n",
|
|
|
|
|
" [ 282679.25692334],\n",
|
|
|
|
|
" [ 226111.82057552],\n",
|
|
|
|
|
" [ 538215.56556676],\n",
|
|
|
|
|
" [ 226111.82057552],\n",
|
|
|
|
|
" [ 352457.85116153],\n",
|
|
|
|
|
" [-161201.02176254],\n",
|
|
|
|
|
" [ 302303.81022042],\n",
|
|
|
|
|
" [ 323007.37024413],\n",
|
|
|
|
|
" [ 370455.47388471],\n",
|
|
|
|
|
" [ 287524.71253158],\n",
|
|
|
|
|
" [1060592.82741372],\n",
|
|
|
|
|
" [ 248909.9801574 ],\n",
|
|
|
|
|
" [ 288108.35201566],\n",
|
|
|
|
|
" [ 287812.79056791],\n",
|
|
|
|
|
" [ 229678.75855599],\n",
|
|
|
|
|
" [ 249386.81191776],\n",
|
|
|
|
|
" [ 390314.04386082],\n",
|
|
|
|
|
" [ 451537.07719892],\n",
|
|
|
|
|
" [ 528632.81740156],\n",
|
|
|
|
|
" [ 811858.17040649],\n",
|
|
|
|
|
" [ 302786.19855838],\n",
|
|
|
|
|
" [ 394915.92369922],\n",
|
|
|
|
|
" [ 390314.04386082],\n",
|
|
|
|
|
" [ 405425.39781537],\n",
|
|
|
|
|
" [1289225.09943457],\n",
|
|
|
|
|
" [ 349637.21447153],\n",
|
|
|
|
|
" [ 309215.24022367],\n",
|
|
|
|
|
" [ 347266.58887672],\n",
|
|
|
|
|
" [ 295166.06881919],\n",
|
|
|
|
|
" [ 301125.47078581],\n",
|
|
|
|
|
" [ 738397.19827818],\n",
|
|
|
|
|
" [ 494697.19977366],\n",
|
|
|
|
|
" [ 305130.09668038],\n",
|
|
|
|
|
" [ 278844.04749905],\n",
|
|
|
|
|
" [ 302727.32114364],\n",
|
|
|
|
|
" [ 310444.76356463],\n",
|
|
|
|
|
" [ 430134.21928882],\n",
|
|
|
|
|
" [ 309671.80782539],\n",
|
|
|
|
|
" [ 721669.74395088],\n",
|
|
|
|
|
" [ 597337.30576723],\n",
|
|
|
|
|
" [ 351846.05585305],\n",
|
|
|
|
|
" [ 704587.65260372],\n",
|
|
|
|
|
" [ 584703.8865779 ],\n",
|
|
|
|
|
" [ 293576.41296521],\n",
|
|
|
|
|
" [ 252313.71396867],\n",
|
|
|
|
|
" [ 718524.74266092],\n",
|
|
|
|
|
" [ 253189.43400231],\n",
|
|
|
|
|
" [ 302168.8466723 ],\n",
|
|
|
|
|
" [ 288735.60719799],\n",
|
|
|
|
|
" [ 498483.16389807],\n",
|
|
|
|
|
" [ 457172.49071198],\n",
|
|
|
|
|
" [ 333178.1247804 ],\n",
|
|
|
|
|
" [ 704558.21389635],\n",
|
|
|
|
|
" [ 468988.05558085],\n",
|
|
|
|
|
" [ 417037.85398029],\n",
|
|
|
|
|
" [ 245048.7518395 ],\n",
|
|
|
|
|
" [ 423113.73966605],\n",
|
|
|
|
|
" [ 354931.43920809],\n",
|
|
|
|
|
" [ 720067.89359305],\n",
|
|
|
|
|
" [ 749936.43382107],\n",
|
|
|
|
|
" [ 551719.98192466],\n",
|
|
|
|
|
" [ 441294.46634331],\n",
|
|
|
|
|
" [ 556690.74879784],\n",
|
|
|
|
|
" [ 592416.24176006],\n",
|
|
|
|
|
" [ 422202.56678133],\n",
|
|
|
|
|
" [ 318693.39045302],\n",
|
|
|
|
|
" [ 403430.30895423],\n",
|
|
|
|
|
" [ 604406.85270084],\n",
|
|
|
|
|
" [ 255244.59836449],\n",
|
|
|
|
|
" [ 374597.01387348],\n",
|
|
|
|
|
" [ 308150.766466 ],\n",
|
|
|
|
|
" [ 604268.00000018],\n",
|
|
|
|
|
" [ 616438.10869654],\n",
|
|
|
|
|
" [ 993142.48503895],\n",
|
|
|
|
|
" [ 395901.52706491],\n",
|
|
|
|
|
" [ 434344.3298916 ],\n",
|
|
|
|
|
" [ 485469.25484471],\n",
|
|
|
|
|
" [ 627355.43570469],\n",
|
|
|
|
|
" [ 310876.80170877],\n",
|
|
|
|
|
" [ 174618.27976904],\n",
|
|
|
|
|
" [ 447061.07115658],\n",
|
|
|
|
|
" [ 335585.89436361],\n",
|
|
|
|
|
" [ 327421.4232634 ],\n",
|
|
|
|
|
" [ 373803.94825239],\n",
|
|
|
|
|
" [ 373789.2288987 ],\n",
|
|
|
|
|
" [ 348443.52012806],\n",
|
|
|
|
|
" [ 372500.73495064],\n",
|
|
|
|
|
" [ 556630.4021154 ],\n",
|
|
|
|
|
" [ 553112.80262718],\n",
|
|
|
|
|
" [ 400867.67603802],\n",
|
|
|
|
|
" [ 569468.03490261],\n",
|
|
|
|
|
" [ 444493.62218294],\n",
|
|
|
|
|
" [ 514644.81461941],\n",
|
|
|
|
|
" [ 702403.53883714],\n",
|
|
|
|
|
" [ 608248.81678902],\n",
|
|
|
|
|
" [1162131.46450679],\n",
|
|
|
|
|
" [ 314668.1242534 ],\n",
|
|
|
|
|
" [ 344572.38851392],\n",
|
|
|
|
|
" [ 641419.64152483],\n",
|
|
|
|
|
" [ 406890.71039238],\n",
|
|
|
|
|
" [ 330256.77930708],\n",
|
|
|
|
|
" [ 406762.86588192],\n",
|
|
|
|
|
" [ 453663.96080749],\n",
|
|
|
|
|
" [1656917.6302292 ],\n",
|
|
|
|
|
" [ 449408.85843111],\n",
|
|
|
|
|
" [ 258707.30278287],\n",
|
|
|
|
|
" [ 345745.18314357],\n",
|
|
|
|
|
" [ 310109.76692089],\n",
|
|
|
|
|
" [ 424505.72665604],\n",
|
|
|
|
|
" [ 310095.0475672 ],\n",
|
|
|
|
|
" [ 401125.77300418],\n",
|
|
|
|
|
" [ 457988.13524458],\n",
|
|
|
|
|
" [ 550575.62607344],\n",
|
|
|
|
|
" [ 523303.70234502],\n",
|
|
|
|
|
" [ 506600.1301475 ],\n",
|
|
|
|
|
" [ 379014.11039314],\n",
|
|
|
|
|
" [ 352767.4964078 ],\n",
|
|
|
|
|
" [ 644515.6686963 ],\n",
|
|
|
|
|
" [ 537987.4698391 ]])"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 295,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"results"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 301,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <td>373000.00</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <td>299000.00</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <td>365000.00</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <td>369000.00</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <td>483791.00</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <th>457</th>\n",
|
|
|
|
|
" <td>655544.02</td>\n",
|
2023-10-17 18:14:03 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <th>458</th>\n",
|
|
|
|
|
" <td>471397.97</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <th>459</th>\n",
|
|
|
|
|
" <td>309958.00</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <th>460</th>\n",
|
|
|
|
|
" <td>699000.00</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <th>461</th>\n",
|
|
|
|
|
" <td>850000.00</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"<p>462 rows × 1 columns</p>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" 0\n",
|
|
|
|
|
"0 373000.00\n",
|
|
|
|
|
"1 299000.00\n",
|
|
|
|
|
"2 365000.00\n",
|
|
|
|
|
"3 369000.00\n",
|
|
|
|
|
"4 483791.00\n",
|
|
|
|
|
".. ...\n",
|
|
|
|
|
"457 655544.02\n",
|
|
|
|
|
"458 471397.97\n",
|
|
|
|
|
"459 309958.00\n",
|
|
|
|
|
"460 699000.00\n",
|
|
|
|
|
"461 850000.00\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"[462 rows x 1 columns]"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 301,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"y_test = pd.read_csv(\"./dev-0/expected.tsv\", header=None)\n",
|
|
|
|
|
"y_test"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 302,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"0.6393762535622007"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 302,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"r2_score(y_test, results)"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 303,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/plain": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"71559.96181964973"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 303,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"mean_absolute_error(y_test, results)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"## Predykcja dla zbioru testowego"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 317,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"final_test_dataset = pd.read_csv(\"./test-A/in.tsv\", sep= \"\\t\", header=None)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 318,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"COLUMN 15:\n",
|
|
|
|
|
"Value counts before changes:\n",
|
|
|
|
|
" 14\n",
|
|
|
|
|
"1 92\n",
|
|
|
|
|
"parter 70\n",
|
|
|
|
|
"3 68\n",
|
|
|
|
|
"4 64\n",
|
|
|
|
|
"2 61\n",
|
|
|
|
|
"5 15\n",
|
|
|
|
|
"6 11\n",
|
|
|
|
|
"7 7\n",
|
|
|
|
|
"10 5\n",
|
|
|
|
|
"> 10 5\n",
|
|
|
|
|
"9 4\n",
|
|
|
|
|
"8 2\n",
|
|
|
|
|
"suterena 1\n",
|
|
|
|
|
"Name: count, dtype: int64\n",
|
|
|
|
|
"Value counts after changes:\n",
|
|
|
|
|
" 14\n",
|
|
|
|
|
" 1 92\n",
|
|
|
|
|
" 2 74\n",
|
|
|
|
|
" 0 70\n",
|
|
|
|
|
" 3 68\n",
|
|
|
|
|
" 4 64\n",
|
|
|
|
|
" 5 15\n",
|
|
|
|
|
" 6 11\n",
|
|
|
|
|
" 7 7\n",
|
|
|
|
|
" 10 5\n",
|
|
|
|
|
" 11 5\n",
|
|
|
|
|
" 9 4\n",
|
|
|
|
|
" 8 2\n",
|
|
|
|
|
"-1 1\n",
|
|
|
|
|
"Name: count, dtype: int64\n",
|
|
|
|
|
"COLUMN 8:\n",
|
|
|
|
|
"0 61.99\n",
|
|
|
|
|
"1 64.00\n",
|
|
|
|
|
"2 51.15\n",
|
|
|
|
|
"3 45.77\n",
|
|
|
|
|
"4 44.36\n",
|
|
|
|
|
" ... \n",
|
|
|
|
|
"413 34.97\n",
|
|
|
|
|
"414 49.06\n",
|
|
|
|
|
"415 76.71\n",
|
|
|
|
|
"416 72.63\n",
|
|
|
|
|
"417 65.84\n",
|
|
|
|
|
"Name: 7, Length: 418, dtype: float64\n",
|
|
|
|
|
"COLUMN 6:\n",
|
|
|
|
|
"Value counts before changes:\n",
|
|
|
|
|
" 5\n",
|
|
|
|
|
"2 175\n",
|
|
|
|
|
"3 143\n",
|
|
|
|
|
"4 50\n",
|
|
|
|
|
"1 40\n",
|
|
|
|
|
"5 6\n",
|
|
|
|
|
"6 2\n",
|
|
|
|
|
"więcej niż 10 1\n",
|
|
|
|
|
"8 1\n",
|
|
|
|
|
"Name: count, dtype: int64\n",
|
|
|
|
|
"Value counts after changes:\n",
|
|
|
|
|
" 5\n",
|
|
|
|
|
"2 175\n",
|
|
|
|
|
"3 143\n",
|
|
|
|
|
"4 50\n",
|
|
|
|
|
"1 40\n",
|
|
|
|
|
"5 6\n",
|
|
|
|
|
"6 2\n",
|
|
|
|
|
"10 1\n",
|
|
|
|
|
"8 1\n",
|
|
|
|
|
"Name: count, dtype: int64\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"\n",
|
|
|
|
|
"# Preprocessing column 15:\n",
|
|
|
|
|
"print(\"COLUMN 15:\")\n",
|
|
|
|
|
"# Count the occurrence of unique values in column before preprocessing:\n",
|
|
|
|
|
"print(\"Value counts before changes:\\n\",final_test_dataset[14].value_counts())\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Replace string to int or NaN:\n",
|
|
|
|
|
"final_test_dataset[14] = final_test_dataset[14].replace({\"parter\": 0, \"suterena\": -1, \"> 10\": 11, \"poddasze\": np.nan})\n",
|
|
|
|
|
"final_test_dataset[14] = final_test_dataset[14].apply(float)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Fill Nans with median:\n",
|
|
|
|
|
"final_test_dataset[14].fillna(final_test_dataset[14].median(), inplace=True)\n",
|
|
|
|
|
"final_test_dataset[14]= final_test_dataset[14].apply(int)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Count the occurrence of unique values in column after preprocessing:\n",
|
|
|
|
|
"print(\"Value counts after changes:\\n\",final_test_dataset[14].value_counts())\n",
|
2023-10-17 18:14:03 +02:00
|
|
|
|
"\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"# Preprocessing column 8:\n",
|
|
|
|
|
"print(\"COLUMN 8:\")\n",
|
|
|
|
|
"# Replace strings containing space to NaN:\n",
|
|
|
|
|
"final_test_dataset[7] = final_test_dataset[7].replace(' ', np.nan, regex=True)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Fill Nans with median:\n",
|
|
|
|
|
"final_test_dataset[7] = final_test_dataset[7].apply(float)\n",
|
|
|
|
|
"final_test_dataset[7].fillna(final_test_dataset[7].median(), inplace=True)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"print(final_test_dataset[7])\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Preprocessing column 6:\n",
|
|
|
|
|
"print(\"COLUMN 6:\")\n",
|
|
|
|
|
"# Count the occurrence of unique values in column before preprocessing:\n",
|
|
|
|
|
"print(\"Value counts before changes:\\n\",final_test_dataset[5].value_counts())\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Change string to 10:\n",
|
|
|
|
|
"final_test_dataset[5] = final_test_dataset[5].replace({\"więcej niż 10\": 10})\n",
|
|
|
|
|
"final_test_dataset[5] = final_test_dataset[5].apply(int)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Count the occurrence of unique values in column after preprocessing:\n",
|
|
|
|
|
"print(\"Value counts after changes:\\n\",final_test_dataset[5].value_counts())\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"final_test_dataset[9].fillna(final_test_dataset[9].median(), inplace=True)\n",
|
|
|
|
|
"final_test_dataset[9] = final_test_dataset[9].apply(float)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"final_test_dataset = final_test_dataset[[5,7,9,14]]\n",
|
|
|
|
|
"\n"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 319,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <th>5</th>\n",
|
|
|
|
|
" <th>7</th>\n",
|
|
|
|
|
" <th>9</th>\n",
|
|
|
|
|
" <th>14</th>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>61.99</td>\n",
|
|
|
|
|
" <td>7.0</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <td>4</td>\n",
|
|
|
|
|
" <td>64.00</td>\n",
|
|
|
|
|
" <td>4.0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>51.15</td>\n",
|
|
|
|
|
" <td>5.0</td>\n",
|
|
|
|
|
" <td>0</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>45.77</td>\n",
|
|
|
|
|
" <td>7.0</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>44.36</td>\n",
|
|
|
|
|
" <td>13.0</td>\n",
|
|
|
|
|
" <td>5</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <th>413</th>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>34.97</td>\n",
|
|
|
|
|
" <td>8.0</td>\n",
|
|
|
|
|
" <td>4</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <th>414</th>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>49.06</td>\n",
|
|
|
|
|
" <td>3.0</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <th>415</th>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>76.71</td>\n",
|
|
|
|
|
" <td>5.0</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <th>416</th>\n",
|
|
|
|
|
" <td>3</td>\n",
|
|
|
|
|
" <td>72.63</td>\n",
|
|
|
|
|
" <td>5.0</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <th>417</th>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" <td>2</td>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" <td>65.84</td>\n",
|
|
|
|
|
" <td>10.0</td>\n",
|
|
|
|
|
" <td>3</td>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"<p>418 rows × 4 columns</p>\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"</div>"
|
|
|
|
|
],
|
|
|
|
|
"text/plain": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" 5 7 9 14\n",
|
|
|
|
|
"0 3 61.99 7.0 2\n",
|
|
|
|
|
"1 4 64.00 4.0 0\n",
|
|
|
|
|
"2 3 51.15 5.0 0\n",
|
|
|
|
|
"3 2 45.77 7.0 2\n",
|
|
|
|
|
"4 2 44.36 13.0 5\n",
|
|
|
|
|
".. .. ... ... ..\n",
|
|
|
|
|
"413 1 34.97 8.0 4\n",
|
|
|
|
|
"414 3 49.06 3.0 3\n",
|
|
|
|
|
"415 3 76.71 5.0 3\n",
|
|
|
|
|
"416 3 72.63 5.0 3\n",
|
|
|
|
|
"417 2 65.84 10.0 3\n",
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"\n",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"[418 rows x 4 columns]"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 319,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"final_test_dataset"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 320,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"final_results = reg.predict(final_test_dataset)"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 321,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"data": {
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"text/html": [
|
|
|
|
|
"<div>\n",
|
|
|
|
|
"<style scoped>\n",
|
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
|
" text-align: right;\n",
|
|
|
|
|
" }\n",
|
|
|
|
|
"</style>\n",
|
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
|
" <thead>\n",
|
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
|
" <th></th>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </thead>\n",
|
|
|
|
|
" <tbody>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>0</th>\n",
|
|
|
|
|
" <td>426282.351904</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>1</th>\n",
|
|
|
|
|
" <td>389890.897311</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>2</th>\n",
|
|
|
|
|
" <td>334372.288463</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>3</th>\n",
|
|
|
|
|
" <td>341143.667679</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>4</th>\n",
|
|
|
|
|
" <td>346709.875023</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>...</th>\n",
|
|
|
|
|
" <td>...</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>413</th>\n",
|
|
|
|
|
" <td>301974.734528</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>414</th>\n",
|
|
|
|
|
" <td>312195.369919</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>415</th>\n",
|
|
|
|
|
" <td>537901.937976</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>416</th>\n",
|
|
|
|
|
" <td>505420.685819</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>417</th>\n",
|
|
|
|
|
" <td>509311.081663</td>\n",
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" </tbody>\n",
|
|
|
|
|
"</table>\n",
|
|
|
|
|
"<p>418 rows × 1 columns</p>\n",
|
|
|
|
|
"</div>"
|
|
|
|
|
],
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"text/plain": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
" 0\n",
|
|
|
|
|
"0 426282.351904\n",
|
|
|
|
|
"1 389890.897311\n",
|
|
|
|
|
"2 334372.288463\n",
|
|
|
|
|
"3 341143.667679\n",
|
|
|
|
|
"4 346709.875023\n",
|
|
|
|
|
".. ...\n",
|
|
|
|
|
"413 301974.734528\n",
|
|
|
|
|
"414 312195.369919\n",
|
|
|
|
|
"415 537901.937976\n",
|
|
|
|
|
"416 505420.685819\n",
|
|
|
|
|
"417 509311.081663\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"[418 rows x 1 columns]"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 321,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"pd.DataFrame(final_results)"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"execution_count": 322,
|
2023-10-17 17:35:43 +02:00
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"pd.DataFrame(final_results).to_csv(\"./test-A/out.tsv\", sep='\\t', index=False, header=None)"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|
"kernelspec": {
|
|
|
|
|
"display_name": "Python 3",
|
|
|
|
|
"language": "python",
|
|
|
|
|
"name": "python3"
|
|
|
|
|
},
|
|
|
|
|
"language_info": {
|
|
|
|
|
"codemirror_mode": {
|
|
|
|
|
"name": "ipython",
|
|
|
|
|
"version": 3
|
|
|
|
|
},
|
|
|
|
|
"file_extension": ".py",
|
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
2023-10-28 14:25:18 +02:00
|
|
|
|
"version": "3.10.0"
|
2023-10-17 17:35:43 +02:00
|
|
|
|
},
|
|
|
|
|
"vscode": {
|
|
|
|
|
"interpreter": {
|
|
|
|
|
"hash": "1b132c2ed43285dcf39f6d01712959169a14a721cf314fe69015adab49bb1fd1"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|
"nbformat_minor": 2
|
|
|
|
|
}
|