{ "cells": [ { "cell_type": "code", "execution_count": 316, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from statistics import mean,median\n", "import re\n", "import numpy as np\n", "from sklearn.metrics import mean_absolute_error\n", " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Wczytanie datasetów" ] }, { "cell_type": "code", "execution_count": 223, "metadata": {}, "outputs": [], "source": [ "train_dataset = pd.read_csv(\"./train/train.tsv\", sep = \"\\t\", header=None)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Data exploration " ] }, { "cell_type": "code", "execution_count": 188, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | 0 | \n", "1 | \n", "2 | \n", "3 | \n", "4 | \n", "5 | \n", "6 | \n", "7 | \n", "8 | \n", "9 | \n", "... | \n", "16 | \n", "17 | \n", "18 | \n", "19 | \n", "20 | \n", "21 | \n", "22 | \n", "23 | \n", "24 | \n", "25 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "309000.0 | \n", "do zamieszkania | \n", "390 zł | \n", "spółdzielcze własnościowe | \n", "7113 | \n", "https://www.otodom.pl/oferta/niezalezny-uklad-... | \n", "2 | \n", "NaN | \n", "43.44 | \n", "wtórny | \n", "... | \n", "NaN | \n", "gazowe | \n", "plastikowe | \n", "NaN | \n", "NaN | \n", "NaN | \n", "cegła | \n", "Polecamy na sprzedaż dwupokojowe mieszkanie p... | \n", "NaN | \n", "telewizja kablowa, internet, meble, piwnica, g... | \n", "
1 rows × 26 columns
\n", "\n", " | 5 | \n", "7 | \n", "9 | \n", "14 | \n", "
---|---|---|---|---|
0 | \n", "3 | \n", "59.1 | \n", "4.0 | \n", "2 | \n", "
\n", " | 0 | \n", "
---|---|
0 | \n", "373000.00 | \n", "
1 | \n", "299000.00 | \n", "
2 | \n", "365000.00 | \n", "
3 | \n", "369000.00 | \n", "
4 | \n", "483791.00 | \n", "
... | \n", "... | \n", "
457 | \n", "655544.02 | \n", "
458 | \n", "471397.97 | \n", "
459 | \n", "309958.00 | \n", "
460 | \n", "699000.00 | \n", "
461 | \n", "850000.00 | \n", "
462 rows × 1 columns
\n", "\n", " | 5 | \n", "7 | \n", "9 | \n", "14 | \n", "
---|---|---|---|---|
0 | \n", "3 | \n", "61.99 | \n", "7.0 | \n", "2 | \n", "
1 | \n", "4 | \n", "64.00 | \n", "4.0 | \n", "0 | \n", "
2 | \n", "3 | \n", "51.15 | \n", "5.0 | \n", "0 | \n", "
3 | \n", "2 | \n", "45.77 | \n", "7.0 | \n", "2 | \n", "
4 | \n", "2 | \n", "44.36 | \n", "13.0 | \n", "5 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
413 | \n", "1 | \n", "34.97 | \n", "8.0 | \n", "4 | \n", "
414 | \n", "3 | \n", "49.06 | \n", "3.0 | \n", "3 | \n", "
415 | \n", "3 | \n", "76.71 | \n", "5.0 | \n", "3 | \n", "
416 | \n", "3 | \n", "72.63 | \n", "5.0 | \n", "3 | \n", "
417 | \n", "2 | \n", "65.84 | \n", "10.0 | \n", "3 | \n", "
418 rows × 4 columns
\n", "\n", " | 0 | \n", "
---|---|
0 | \n", "426282.351904 | \n", "
1 | \n", "389890.897311 | \n", "
2 | \n", "334372.288463 | \n", "
3 | \n", "341143.667679 | \n", "
4 | \n", "346709.875023 | \n", "
... | \n", "... | \n", "
413 | \n", "301974.734528 | \n", "
414 | \n", "312195.369919 | \n", "
415 | \n", "537901.937976 | \n", "
416 | \n", "505420.685819 | \n", "
417 | \n", "509311.081663 | \n", "
418 rows × 1 columns
\n", "