Add script
This commit is contained in:
parent
d087d791a5
commit
b680bf88e1
@ -1,6 +0,0 @@
|
||||
{
|
||||
"cells": [],
|
||||
"metadata": {},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -1,471 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "b14199d0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0.1\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip3 install --upgrade pip\u001b[0m\n",
|
||||
"Downloading property-salesmelbourne-city.zip to /Users/mmoryl/Projects/UAM/ium_s487183\n",
|
||||
" 0%| | 0.00/589k [00:00<?, ?B/s]\n",
|
||||
"100%|████████████████████████████████████████| 589k/589k [00:00<00:00, 7.61MB/s]\n",
|
||||
"Archive: property-salesmelbourne-city.zip\n",
|
||||
" inflating: data/Property Sales of Melbourne City.csv \n",
|
||||
"Property Sales of Melbourne City.csv\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!pip3 install -q kaggle\n",
|
||||
"!kaggle datasets download amalab182/property-salesmelbourne-city\n",
|
||||
"!mkdir -p data\n",
|
||||
"!unzip -o property-salesmelbourne-city.zip -d data\n",
|
||||
"!rm property-salesmelbourne-city.zip\n",
|
||||
"!ls data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "10a21817",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Requirement already satisfied: pandas in /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages (1.5.3)\n",
|
||||
"Requirement already satisfied: pytz>=2020.1 in /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages (from pandas) (2023.2)\n",
|
||||
"Requirement already satisfied: python-dateutil>=2.8.1 in /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages (from pandas) (2.8.2)\n",
|
||||
"Requirement already satisfied: numpy>=1.21.0 in /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages (from pandas) (1.24.2)\n",
|
||||
"Requirement already satisfied: six>=1.5 in /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n",
|
||||
"\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0.1\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip3 install --upgrade pip\u001b[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>Unnamed: 0</th>\n",
|
||||
" <th>Suburb</th>\n",
|
||||
" <th>Address</th>\n",
|
||||
" <th>Rooms</th>\n",
|
||||
" <th>Type</th>\n",
|
||||
" <th>Price</th>\n",
|
||||
" <th>Method</th>\n",
|
||||
" <th>SellerG</th>\n",
|
||||
" <th>Date</th>\n",
|
||||
" <th>Distance</th>\n",
|
||||
" <th>...</th>\n",
|
||||
" <th>Bathroom</th>\n",
|
||||
" <th>Car</th>\n",
|
||||
" <th>Landsize</th>\n",
|
||||
" <th>BuildingArea</th>\n",
|
||||
" <th>YearBuilt</th>\n",
|
||||
" <th>CouncilArea</th>\n",
|
||||
" <th>Lattitude</th>\n",
|
||||
" <th>Longtitude</th>\n",
|
||||
" <th>Regionname</th>\n",
|
||||
" <th>Propertycount</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Abbotsford</td>\n",
|
||||
" <td>85 Turner St</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>h</td>\n",
|
||||
" <td>1480000</td>\n",
|
||||
" <td>S</td>\n",
|
||||
" <td>Biggin</td>\n",
|
||||
" <td>3/12/2016</td>\n",
|
||||
" <td>2.5</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>202.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>Yarra</td>\n",
|
||||
" <td>-37.79960</td>\n",
|
||||
" <td>144.99840</td>\n",
|
||||
" <td>Northern Metropolitan</td>\n",
|
||||
" <td>4019.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>Abbotsford</td>\n",
|
||||
" <td>25 Bloomburg St</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>h</td>\n",
|
||||
" <td>1035000</td>\n",
|
||||
" <td>S</td>\n",
|
||||
" <td>Biggin</td>\n",
|
||||
" <td>4/02/2016</td>\n",
|
||||
" <td>2.5</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>156.0</td>\n",
|
||||
" <td>79.0</td>\n",
|
||||
" <td>1900.0</td>\n",
|
||||
" <td>Yarra</td>\n",
|
||||
" <td>-37.80790</td>\n",
|
||||
" <td>144.99340</td>\n",
|
||||
" <td>Northern Metropolitan</td>\n",
|
||||
" <td>4019.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>Abbotsford</td>\n",
|
||||
" <td>5 Charles St</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>h</td>\n",
|
||||
" <td>1465000</td>\n",
|
||||
" <td>SP</td>\n",
|
||||
" <td>Biggin</td>\n",
|
||||
" <td>4/03/2017</td>\n",
|
||||
" <td>2.5</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>134.0</td>\n",
|
||||
" <td>150.0</td>\n",
|
||||
" <td>1900.0</td>\n",
|
||||
" <td>Yarra</td>\n",
|
||||
" <td>-37.80930</td>\n",
|
||||
" <td>144.99440</td>\n",
|
||||
" <td>Northern Metropolitan</td>\n",
|
||||
" <td>4019.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>Abbotsford</td>\n",
|
||||
" <td>40 Federation La</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>h</td>\n",
|
||||
" <td>850000</td>\n",
|
||||
" <td>PI</td>\n",
|
||||
" <td>Biggin</td>\n",
|
||||
" <td>4/03/2017</td>\n",
|
||||
" <td>2.5</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>94.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>Yarra</td>\n",
|
||||
" <td>-37.79690</td>\n",
|
||||
" <td>144.99690</td>\n",
|
||||
" <td>Northern Metropolitan</td>\n",
|
||||
" <td>4019.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>Abbotsford</td>\n",
|
||||
" <td>55a Park St</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>h</td>\n",
|
||||
" <td>1600000</td>\n",
|
||||
" <td>VB</td>\n",
|
||||
" <td>Nelson</td>\n",
|
||||
" <td>4/06/2016</td>\n",
|
||||
" <td>2.5</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>120.0</td>\n",
|
||||
" <td>142.0</td>\n",
|
||||
" <td>2014.0</td>\n",
|
||||
" <td>Yarra</td>\n",
|
||||
" <td>-37.80720</td>\n",
|
||||
" <td>144.99410</td>\n",
|
||||
" <td>Northern Metropolitan</td>\n",
|
||||
" <td>4019.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>18391</th>\n",
|
||||
" <td>23540</td>\n",
|
||||
" <td>Williamstown</td>\n",
|
||||
" <td>8/2 Thompson St</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>t</td>\n",
|
||||
" <td>622500</td>\n",
|
||||
" <td>SP</td>\n",
|
||||
" <td>Greg</td>\n",
|
||||
" <td>26/08/2017</td>\n",
|
||||
" <td>6.8</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>89.0</td>\n",
|
||||
" <td>2010.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>-37.86393</td>\n",
|
||||
" <td>144.90484</td>\n",
|
||||
" <td>Western Metropolitan</td>\n",
|
||||
" <td>6380.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>18392</th>\n",
|
||||
" <td>23541</td>\n",
|
||||
" <td>Williamstown</td>\n",
|
||||
" <td>96 Verdon St</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>h</td>\n",
|
||||
" <td>2500000</td>\n",
|
||||
" <td>PI</td>\n",
|
||||
" <td>Sweeney</td>\n",
|
||||
" <td>26/08/2017</td>\n",
|
||||
" <td>6.8</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>866.0</td>\n",
|
||||
" <td>157.0</td>\n",
|
||||
" <td>1920.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>-37.85908</td>\n",
|
||||
" <td>144.89299</td>\n",
|
||||
" <td>Western Metropolitan</td>\n",
|
||||
" <td>6380.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>18393</th>\n",
|
||||
" <td>23544</td>\n",
|
||||
" <td>Yallambie</td>\n",
|
||||
" <td>17 Amaroo Wy</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>h</td>\n",
|
||||
" <td>1100000</td>\n",
|
||||
" <td>S</td>\n",
|
||||
" <td>Buckingham</td>\n",
|
||||
" <td>26/08/2017</td>\n",
|
||||
" <td>12.7</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>-37.72006</td>\n",
|
||||
" <td>145.10547</td>\n",
|
||||
" <td>Northern Metropolitan</td>\n",
|
||||
" <td>1369.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>18394</th>\n",
|
||||
" <td>23545</td>\n",
|
||||
" <td>Yarraville</td>\n",
|
||||
" <td>6 Agnes St</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>h</td>\n",
|
||||
" <td>1285000</td>\n",
|
||||
" <td>SP</td>\n",
|
||||
" <td>Village</td>\n",
|
||||
" <td>26/08/2017</td>\n",
|
||||
" <td>6.3</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>362.0</td>\n",
|
||||
" <td>112.0</td>\n",
|
||||
" <td>1920.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>-37.81188</td>\n",
|
||||
" <td>144.88449</td>\n",
|
||||
" <td>Western Metropolitan</td>\n",
|
||||
" <td>6543.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>18395</th>\n",
|
||||
" <td>23546</td>\n",
|
||||
" <td>Yarraville</td>\n",
|
||||
" <td>33 Freeman St</td>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>h</td>\n",
|
||||
" <td>1050000</td>\n",
|
||||
" <td>VB</td>\n",
|
||||
" <td>Village</td>\n",
|
||||
" <td>26/08/2017</td>\n",
|
||||
" <td>6.3</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>139.0</td>\n",
|
||||
" <td>1950.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>-37.81829</td>\n",
|
||||
" <td>144.87404</td>\n",
|
||||
" <td>Western Metropolitan</td>\n",
|
||||
" <td>6543.0</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>18396 rows × 22 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" Unnamed: 0 Suburb Address Rooms Type Price Method \\\n",
|
||||
"0 1 Abbotsford 85 Turner St 2 h 1480000 S \n",
|
||||
"1 2 Abbotsford 25 Bloomburg St 2 h 1035000 S \n",
|
||||
"2 4 Abbotsford 5 Charles St 3 h 1465000 SP \n",
|
||||
"3 5 Abbotsford 40 Federation La 3 h 850000 PI \n",
|
||||
"4 6 Abbotsford 55a Park St 4 h 1600000 VB \n",
|
||||
"... ... ... ... ... ... ... ... \n",
|
||||
"18391 23540 Williamstown 8/2 Thompson St 2 t 622500 SP \n",
|
||||
"18392 23541 Williamstown 96 Verdon St 4 h 2500000 PI \n",
|
||||
"18393 23544 Yallambie 17 Amaroo Wy 4 h 1100000 S \n",
|
||||
"18394 23545 Yarraville 6 Agnes St 4 h 1285000 SP \n",
|
||||
"18395 23546 Yarraville 33 Freeman St 4 h 1050000 VB \n",
|
||||
"\n",
|
||||
" SellerG Date Distance ... Bathroom Car Landsize \\\n",
|
||||
"0 Biggin 3/12/2016 2.5 ... 1.0 1.0 202.0 \n",
|
||||
"1 Biggin 4/02/2016 2.5 ... 1.0 0.0 156.0 \n",
|
||||
"2 Biggin 4/03/2017 2.5 ... 2.0 0.0 134.0 \n",
|
||||
"3 Biggin 4/03/2017 2.5 ... 2.0 1.0 94.0 \n",
|
||||
"4 Nelson 4/06/2016 2.5 ... 1.0 2.0 120.0 \n",
|
||||
"... ... ... ... ... ... ... ... \n",
|
||||
"18391 Greg 26/08/2017 6.8 ... 2.0 1.0 NaN \n",
|
||||
"18392 Sweeney 26/08/2017 6.8 ... 1.0 5.0 866.0 \n",
|
||||
"18393 Buckingham 26/08/2017 12.7 ... 3.0 2.0 NaN \n",
|
||||
"18394 Village 26/08/2017 6.3 ... 1.0 1.0 362.0 \n",
|
||||
"18395 Village 26/08/2017 6.3 ... 2.0 2.0 NaN \n",
|
||||
"\n",
|
||||
" BuildingArea YearBuilt CouncilArea Lattitude Longtitude \\\n",
|
||||
"0 NaN NaN Yarra -37.79960 144.99840 \n",
|
||||
"1 79.0 1900.0 Yarra -37.80790 144.99340 \n",
|
||||
"2 150.0 1900.0 Yarra -37.80930 144.99440 \n",
|
||||
"3 NaN NaN Yarra -37.79690 144.99690 \n",
|
||||
"4 142.0 2014.0 Yarra -37.80720 144.99410 \n",
|
||||
"... ... ... ... ... ... \n",
|
||||
"18391 89.0 2010.0 NaN -37.86393 144.90484 \n",
|
||||
"18392 157.0 1920.0 NaN -37.85908 144.89299 \n",
|
||||
"18393 NaN NaN NaN -37.72006 145.10547 \n",
|
||||
"18394 112.0 1920.0 NaN -37.81188 144.88449 \n",
|
||||
"18395 139.0 1950.0 NaN -37.81829 144.87404 \n",
|
||||
"\n",
|
||||
" Regionname Propertycount \n",
|
||||
"0 Northern Metropolitan 4019.0 \n",
|
||||
"1 Northern Metropolitan 4019.0 \n",
|
||||
"2 Northern Metropolitan 4019.0 \n",
|
||||
"3 Northern Metropolitan 4019.0 \n",
|
||||
"4 Northern Metropolitan 4019.0 \n",
|
||||
"... ... ... \n",
|
||||
"18391 Western Metropolitan 6380.0 \n",
|
||||
"18392 Western Metropolitan 6380.0 \n",
|
||||
"18393 Northern Metropolitan 1369.0 \n",
|
||||
"18394 Western Metropolitan 6543.0 \n",
|
||||
"18395 Western Metropolitan 6543.0 \n",
|
||||
"\n",
|
||||
"[18396 rows x 22 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!pip3 install pandas\n",
|
||||
"import pandas as pd\n",
|
||||
"sells = pd.read_csv('data/Property Sales of Melbourne City.csv')\n",
|
||||
"sells\n",
|
||||
"# sells[\"Car Model\"].value_counts()\n",
|
||||
"# len(sells.index)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0768cc2e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip3 install scikit-learn\n",
|
||||
"from sklearn.model_selection import train_test_split"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
44
prepare-dataset.py
Normal file
44
prepare-dataset.py
Normal file
@ -0,0 +1,44 @@
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
# get data
|
||||
sells = pd.read_csv('data/Property Sales of Melbourne City.csv')
|
||||
|
||||
# delete unnecessary columns and drop rows with NaN values
|
||||
columns_to_drop = [
|
||||
'Lattitude',
|
||||
'Longtitude',
|
||||
'CouncilArea',
|
||||
'Propertycount',
|
||||
'Method',
|
||||
'SellerG',
|
||||
'Date',
|
||||
'Postcode',
|
||||
'Bedroom2',
|
||||
'Bathroom',
|
||||
'Car',
|
||||
'BuildingArea',
|
||||
'Address'
|
||||
]
|
||||
sells = sells.drop(columns_to_drop, axis=1).dropna()
|
||||
|
||||
# normalize values
|
||||
sells["Price"] = sells["Price"] / sells["Price"].max()
|
||||
sells["Landsize"] = sells["Landsize"] / sells["Landsize"].max()
|
||||
sells["Distance"] = sells["Distance"] / sells["Distance"].max()
|
||||
|
||||
# split to train/dev/test subsets
|
||||
X = sells
|
||||
Y = sells.pop('Price')
|
||||
|
||||
X_train, X_temp, Y_train, Y_temp = train_test_split(X, Y, test_size=0.3, random_state=1)
|
||||
X_val, X_test, Y_val, Y_test = train_test_split(X_temp, Y_temp, test_size=0.5, random_state=1)
|
||||
|
||||
# save subsets to files
|
||||
X_train.to_csv('X_train.csv', index=False)
|
||||
X_val.to_csv('X_val.csv', index=False)
|
||||
X_test.to_csv('X_test.csv', index=False)
|
||||
|
||||
Y_train.to_csv('Y_train.csv', index=False)
|
||||
Y_val.to_csv('Y_val.csv', index=False)
|
||||
Y_test.to_csv('Y_test.csv', index=False)
|
Loading…
Reference in New Issue
Block a user