ium_z487183/IUM_project.ipynb
2023-03-25 13:39:50 +01:00

472 lines
18 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"id": "b14199d0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip3 install --upgrade pip\u001b[0m\n",
"Downloading property-salesmelbourne-city.zip to /Users/mmoryl/Projects/UAM/ium_s487183\n",
" 0%| | 0.00/589k [00:00<?, ?B/s]\n",
"100%|████████████████████████████████████████| 589k/589k [00:00<00:00, 7.61MB/s]\n",
"Archive: property-salesmelbourne-city.zip\n",
" inflating: data/Property Sales of Melbourne City.csv \n",
"Property Sales of Melbourne City.csv\n"
]
}
],
"source": [
"!pip3 install -q kaggle\n",
"!kaggle datasets download amalab182/property-salesmelbourne-city\n",
"!mkdir -p data\n",
"!unzip -o property-salesmelbourne-city.zip -d data\n",
"!rm property-salesmelbourne-city.zip\n",
"!ls data"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "10a21817",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: pandas in /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages (1.5.3)\n",
"Requirement already satisfied: pytz>=2020.1 in /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages (from pandas) (2023.2)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: numpy>=1.21.0 in /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages (from pandas) (1.24.2)\n",
"Requirement already satisfied: six>=1.5 in /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n",
"\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.0.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip3 install --upgrade pip\u001b[0m\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Unnamed: 0</th>\n",
" <th>Suburb</th>\n",
" <th>Address</th>\n",
" <th>Rooms</th>\n",
" <th>Type</th>\n",
" <th>Price</th>\n",
" <th>Method</th>\n",
" <th>SellerG</th>\n",
" <th>Date</th>\n",
" <th>Distance</th>\n",
" <th>...</th>\n",
" <th>Bathroom</th>\n",
" <th>Car</th>\n",
" <th>Landsize</th>\n",
" <th>BuildingArea</th>\n",
" <th>YearBuilt</th>\n",
" <th>CouncilArea</th>\n",
" <th>Lattitude</th>\n",
" <th>Longtitude</th>\n",
" <th>Regionname</th>\n",
" <th>Propertycount</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>Abbotsford</td>\n",
" <td>85 Turner St</td>\n",
" <td>2</td>\n",
" <td>h</td>\n",
" <td>1480000</td>\n",
" <td>S</td>\n",
" <td>Biggin</td>\n",
" <td>3/12/2016</td>\n",
" <td>2.5</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>202.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Yarra</td>\n",
" <td>-37.79960</td>\n",
" <td>144.99840</td>\n",
" <td>Northern Metropolitan</td>\n",
" <td>4019.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>Abbotsford</td>\n",
" <td>25 Bloomburg St</td>\n",
" <td>2</td>\n",
" <td>h</td>\n",
" <td>1035000</td>\n",
" <td>S</td>\n",
" <td>Biggin</td>\n",
" <td>4/02/2016</td>\n",
" <td>2.5</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>156.0</td>\n",
" <td>79.0</td>\n",
" <td>1900.0</td>\n",
" <td>Yarra</td>\n",
" <td>-37.80790</td>\n",
" <td>144.99340</td>\n",
" <td>Northern Metropolitan</td>\n",
" <td>4019.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4</td>\n",
" <td>Abbotsford</td>\n",
" <td>5 Charles St</td>\n",
" <td>3</td>\n",
" <td>h</td>\n",
" <td>1465000</td>\n",
" <td>SP</td>\n",
" <td>Biggin</td>\n",
" <td>4/03/2017</td>\n",
" <td>2.5</td>\n",
" <td>...</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>134.0</td>\n",
" <td>150.0</td>\n",
" <td>1900.0</td>\n",
" <td>Yarra</td>\n",
" <td>-37.80930</td>\n",
" <td>144.99440</td>\n",
" <td>Northern Metropolitan</td>\n",
" <td>4019.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5</td>\n",
" <td>Abbotsford</td>\n",
" <td>40 Federation La</td>\n",
" <td>3</td>\n",
" <td>h</td>\n",
" <td>850000</td>\n",
" <td>PI</td>\n",
" <td>Biggin</td>\n",
" <td>4/03/2017</td>\n",
" <td>2.5</td>\n",
" <td>...</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>94.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Yarra</td>\n",
" <td>-37.79690</td>\n",
" <td>144.99690</td>\n",
" <td>Northern Metropolitan</td>\n",
" <td>4019.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>6</td>\n",
" <td>Abbotsford</td>\n",
" <td>55a Park St</td>\n",
" <td>4</td>\n",
" <td>h</td>\n",
" <td>1600000</td>\n",
" <td>VB</td>\n",
" <td>Nelson</td>\n",
" <td>4/06/2016</td>\n",
" <td>2.5</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>120.0</td>\n",
" <td>142.0</td>\n",
" <td>2014.0</td>\n",
" <td>Yarra</td>\n",
" <td>-37.80720</td>\n",
" <td>144.99410</td>\n",
" <td>Northern Metropolitan</td>\n",
" <td>4019.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18391</th>\n",
" <td>23540</td>\n",
" <td>Williamstown</td>\n",
" <td>8/2 Thompson St</td>\n",
" <td>2</td>\n",
" <td>t</td>\n",
" <td>622500</td>\n",
" <td>SP</td>\n",
" <td>Greg</td>\n",
" <td>26/08/2017</td>\n",
" <td>6.8</td>\n",
" <td>...</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>89.0</td>\n",
" <td>2010.0</td>\n",
" <td>NaN</td>\n",
" <td>-37.86393</td>\n",
" <td>144.90484</td>\n",
" <td>Western Metropolitan</td>\n",
" <td>6380.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18392</th>\n",
" <td>23541</td>\n",
" <td>Williamstown</td>\n",
" <td>96 Verdon St</td>\n",
" <td>4</td>\n",
" <td>h</td>\n",
" <td>2500000</td>\n",
" <td>PI</td>\n",
" <td>Sweeney</td>\n",
" <td>26/08/2017</td>\n",
" <td>6.8</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>5.0</td>\n",
" <td>866.0</td>\n",
" <td>157.0</td>\n",
" <td>1920.0</td>\n",
" <td>NaN</td>\n",
" <td>-37.85908</td>\n",
" <td>144.89299</td>\n",
" <td>Western Metropolitan</td>\n",
" <td>6380.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18393</th>\n",
" <td>23544</td>\n",
" <td>Yallambie</td>\n",
" <td>17 Amaroo Wy</td>\n",
" <td>4</td>\n",
" <td>h</td>\n",
" <td>1100000</td>\n",
" <td>S</td>\n",
" <td>Buckingham</td>\n",
" <td>26/08/2017</td>\n",
" <td>12.7</td>\n",
" <td>...</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>-37.72006</td>\n",
" <td>145.10547</td>\n",
" <td>Northern Metropolitan</td>\n",
" <td>1369.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18394</th>\n",
" <td>23545</td>\n",
" <td>Yarraville</td>\n",
" <td>6 Agnes St</td>\n",
" <td>4</td>\n",
" <td>h</td>\n",
" <td>1285000</td>\n",
" <td>SP</td>\n",
" <td>Village</td>\n",
" <td>26/08/2017</td>\n",
" <td>6.3</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>362.0</td>\n",
" <td>112.0</td>\n",
" <td>1920.0</td>\n",
" <td>NaN</td>\n",
" <td>-37.81188</td>\n",
" <td>144.88449</td>\n",
" <td>Western Metropolitan</td>\n",
" <td>6543.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18395</th>\n",
" <td>23546</td>\n",
" <td>Yarraville</td>\n",
" <td>33 Freeman St</td>\n",
" <td>4</td>\n",
" <td>h</td>\n",
" <td>1050000</td>\n",
" <td>VB</td>\n",
" <td>Village</td>\n",
" <td>26/08/2017</td>\n",
" <td>6.3</td>\n",
" <td>...</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>139.0</td>\n",
" <td>1950.0</td>\n",
" <td>NaN</td>\n",
" <td>-37.81829</td>\n",
" <td>144.87404</td>\n",
" <td>Western Metropolitan</td>\n",
" <td>6543.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>18396 rows × 22 columns</p>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0 Suburb Address Rooms Type Price Method \\\n",
"0 1 Abbotsford 85 Turner St 2 h 1480000 S \n",
"1 2 Abbotsford 25 Bloomburg St 2 h 1035000 S \n",
"2 4 Abbotsford 5 Charles St 3 h 1465000 SP \n",
"3 5 Abbotsford 40 Federation La 3 h 850000 PI \n",
"4 6 Abbotsford 55a Park St 4 h 1600000 VB \n",
"... ... ... ... ... ... ... ... \n",
"18391 23540 Williamstown 8/2 Thompson St 2 t 622500 SP \n",
"18392 23541 Williamstown 96 Verdon St 4 h 2500000 PI \n",
"18393 23544 Yallambie 17 Amaroo Wy 4 h 1100000 S \n",
"18394 23545 Yarraville 6 Agnes St 4 h 1285000 SP \n",
"18395 23546 Yarraville 33 Freeman St 4 h 1050000 VB \n",
"\n",
" SellerG Date Distance ... Bathroom Car Landsize \\\n",
"0 Biggin 3/12/2016 2.5 ... 1.0 1.0 202.0 \n",
"1 Biggin 4/02/2016 2.5 ... 1.0 0.0 156.0 \n",
"2 Biggin 4/03/2017 2.5 ... 2.0 0.0 134.0 \n",
"3 Biggin 4/03/2017 2.5 ... 2.0 1.0 94.0 \n",
"4 Nelson 4/06/2016 2.5 ... 1.0 2.0 120.0 \n",
"... ... ... ... ... ... ... ... \n",
"18391 Greg 26/08/2017 6.8 ... 2.0 1.0 NaN \n",
"18392 Sweeney 26/08/2017 6.8 ... 1.0 5.0 866.0 \n",
"18393 Buckingham 26/08/2017 12.7 ... 3.0 2.0 NaN \n",
"18394 Village 26/08/2017 6.3 ... 1.0 1.0 362.0 \n",
"18395 Village 26/08/2017 6.3 ... 2.0 2.0 NaN \n",
"\n",
" BuildingArea YearBuilt CouncilArea Lattitude Longtitude \\\n",
"0 NaN NaN Yarra -37.79960 144.99840 \n",
"1 79.0 1900.0 Yarra -37.80790 144.99340 \n",
"2 150.0 1900.0 Yarra -37.80930 144.99440 \n",
"3 NaN NaN Yarra -37.79690 144.99690 \n",
"4 142.0 2014.0 Yarra -37.80720 144.99410 \n",
"... ... ... ... ... ... \n",
"18391 89.0 2010.0 NaN -37.86393 144.90484 \n",
"18392 157.0 1920.0 NaN -37.85908 144.89299 \n",
"18393 NaN NaN NaN -37.72006 145.10547 \n",
"18394 112.0 1920.0 NaN -37.81188 144.88449 \n",
"18395 139.0 1950.0 NaN -37.81829 144.87404 \n",
"\n",
" Regionname Propertycount \n",
"0 Northern Metropolitan 4019.0 \n",
"1 Northern Metropolitan 4019.0 \n",
"2 Northern Metropolitan 4019.0 \n",
"3 Northern Metropolitan 4019.0 \n",
"4 Northern Metropolitan 4019.0 \n",
"... ... ... \n",
"18391 Western Metropolitan 6380.0 \n",
"18392 Western Metropolitan 6380.0 \n",
"18393 Northern Metropolitan 1369.0 \n",
"18394 Western Metropolitan 6543.0 \n",
"18395 Western Metropolitan 6543.0 \n",
"\n",
"[18396 rows x 22 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"!pip3 install pandas\n",
"import pandas as pd\n",
"sells = pd.read_csv('data/Property Sales of Melbourne City.csv')\n",
"sells\n",
"# sells[\"Car Model\"].value_counts()\n",
"# len(sells.index)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0768cc2e",
"metadata": {},
"outputs": [],
"source": [
"!pip3 install scikit-learn\n",
"from sklearn.model_selection import train_test_split"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}