2310 lines
72 KiB
Plaintext
2310 lines
72 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
}
|
||
},
|
||
"source": [
|
||
"# Analiza danych w Pythonie\n",
|
||
"\n",
|
||
"### Tomasz Dwojak\n",
|
||
"\n",
|
||
"### 3 czerwca 2018"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
}
|
||
},
|
||
"source": [
|
||
"### Analiza danych:\n",
|
||
"\n",
|
||
" * R\n",
|
||
" * Python"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
}
|
||
},
|
||
"source": [
|
||
"### Python Ekosystem\n",
|
||
"\n",
|
||
" * pandas: ramka danych\n",
|
||
" * sklearn: modele ML\n",
|
||
" * numpy: obliczenia\n",
|
||
" * matplotlib: wykresy"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "skip"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"%matplotlib inline"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
}
|
||
},
|
||
"source": [
|
||
"### Typy danych\n",
|
||
"\n",
|
||
" * Szereg (`pd.Series`)\n",
|
||
" * Ramka danych (`pd.DataFrame`)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
}
|
||
},
|
||
"source": [
|
||
"### Wczytanie danych"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "fragment"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"data = pd.read_csv(\"./data/iowa.csv.gz\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Id</th>\n",
|
||
" <th>MSSubClass</th>\n",
|
||
" <th>MSZoning</th>\n",
|
||
" <th>LotFrontage</th>\n",
|
||
" <th>LotArea</th>\n",
|
||
" <th>Street</th>\n",
|
||
" <th>Alley</th>\n",
|
||
" <th>LotShape</th>\n",
|
||
" <th>LandContour</th>\n",
|
||
" <th>Utilities</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>PoolArea</th>\n",
|
||
" <th>PoolQC</th>\n",
|
||
" <th>Fence</th>\n",
|
||
" <th>MiscFeature</th>\n",
|
||
" <th>MiscVal</th>\n",
|
||
" <th>MoSold</th>\n",
|
||
" <th>YrSold</th>\n",
|
||
" <th>SaleType</th>\n",
|
||
" <th>SaleCondition</th>\n",
|
||
" <th>SalePrice</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>60</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>65.0</td>\n",
|
||
" <td>8450</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Reg</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2008</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>208500</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>20</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>80.0</td>\n",
|
||
" <td>9600</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Reg</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>2007</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>181500</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>60</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>68.0</td>\n",
|
||
" <td>11250</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>IR1</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>2008</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>223500</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>70</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>60.0</td>\n",
|
||
" <td>9550</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>IR1</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2006</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Abnorml</td>\n",
|
||
" <td>140000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>60</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>84.0</td>\n",
|
||
" <td>14260</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>IR1</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>12</td>\n",
|
||
" <td>2008</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>250000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 81 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
|
||
"0 1 60 RL 65.0 8450 Pave NaN Reg \n",
|
||
"1 2 20 RL 80.0 9600 Pave NaN Reg \n",
|
||
"2 3 60 RL 68.0 11250 Pave NaN IR1 \n",
|
||
"3 4 70 RL 60.0 9550 Pave NaN IR1 \n",
|
||
"4 5 60 RL 84.0 14260 Pave NaN IR1 \n",
|
||
"\n",
|
||
" LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n",
|
||
"0 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"1 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"2 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"3 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"4 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"\n",
|
||
" MoSold YrSold SaleType SaleCondition SalePrice \n",
|
||
"0 2 2008 WD Normal 208500 \n",
|
||
"1 5 2007 WD Normal 181500 \n",
|
||
"2 9 2008 WD Normal 223500 \n",
|
||
"3 2 2006 WD Abnorml 140000 \n",
|
||
"4 12 2008 WD Normal 250000 \n",
|
||
"\n",
|
||
"[5 rows x 81 columns]"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"(1460, 81)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"shape = data.shape\n",
|
||
"rows = shape[0]\n",
|
||
"cols = shape[1]\n",
|
||
"\n",
|
||
"print(rows, cols)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
"RangeIndex: 1460 entries, 0 to 1459\n",
|
||
"Data columns (total 81 columns):\n",
|
||
"Id 1460 non-null int64\n",
|
||
"MSSubClass 1460 non-null int64\n",
|
||
"MSZoning 1460 non-null object\n",
|
||
"LotFrontage 1201 non-null float64\n",
|
||
"LotArea 1460 non-null int64\n",
|
||
"Street 1460 non-null object\n",
|
||
"Alley 91 non-null object\n",
|
||
"LotShape 1460 non-null object\n",
|
||
"LandContour 1460 non-null object\n",
|
||
"Utilities 1460 non-null object\n",
|
||
"LotConfig 1460 non-null object\n",
|
||
"LandSlope 1460 non-null object\n",
|
||
"Neighborhood 1460 non-null object\n",
|
||
"Condition1 1460 non-null object\n",
|
||
"Condition2 1460 non-null object\n",
|
||
"BldgType 1460 non-null object\n",
|
||
"HouseStyle 1460 non-null object\n",
|
||
"OverallQual 1460 non-null int64\n",
|
||
"OverallCond 1460 non-null int64\n",
|
||
"YearBuilt 1460 non-null int64\n",
|
||
"YearRemodAdd 1460 non-null int64\n",
|
||
"RoofStyle 1460 non-null object\n",
|
||
"RoofMatl 1460 non-null object\n",
|
||
"Exterior1st 1460 non-null object\n",
|
||
"Exterior2nd 1460 non-null object\n",
|
||
"MasVnrType 1452 non-null object\n",
|
||
"MasVnrArea 1452 non-null float64\n",
|
||
"ExterQual 1460 non-null object\n",
|
||
"ExterCond 1460 non-null object\n",
|
||
"Foundation 1460 non-null object\n",
|
||
"BsmtQual 1423 non-null object\n",
|
||
"BsmtCond 1423 non-null object\n",
|
||
"BsmtExposure 1422 non-null object\n",
|
||
"BsmtFinType1 1423 non-null object\n",
|
||
"BsmtFinSF1 1460 non-null int64\n",
|
||
"BsmtFinType2 1422 non-null object\n",
|
||
"BsmtFinSF2 1460 non-null int64\n",
|
||
"BsmtUnfSF 1460 non-null int64\n",
|
||
"TotalBsmtSF 1460 non-null int64\n",
|
||
"Heating 1460 non-null object\n",
|
||
"HeatingQC 1460 non-null object\n",
|
||
"CentralAir 1460 non-null object\n",
|
||
"Electrical 1459 non-null object\n",
|
||
"1stFlrSF 1460 non-null int64\n",
|
||
"2ndFlrSF 1460 non-null int64\n",
|
||
"LowQualFinSF 1460 non-null int64\n",
|
||
"GrLivArea 1460 non-null int64\n",
|
||
"BsmtFullBath 1460 non-null int64\n",
|
||
"BsmtHalfBath 1460 non-null int64\n",
|
||
"FullBath 1460 non-null int64\n",
|
||
"HalfBath 1460 non-null int64\n",
|
||
"BedroomAbvGr 1460 non-null int64\n",
|
||
"KitchenAbvGr 1460 non-null int64\n",
|
||
"KitchenQual 1460 non-null object\n",
|
||
"TotRmsAbvGrd 1460 non-null int64\n",
|
||
"Functional 1460 non-null object\n",
|
||
"Fireplaces 1460 non-null int64\n",
|
||
"FireplaceQu 770 non-null object\n",
|
||
"GarageType 1379 non-null object\n",
|
||
"GarageYrBlt 1379 non-null float64\n",
|
||
"GarageFinish 1379 non-null object\n",
|
||
"GarageCars 1460 non-null int64\n",
|
||
"GarageArea 1460 non-null int64\n",
|
||
"GarageQual 1379 non-null object\n",
|
||
"GarageCond 1379 non-null object\n",
|
||
"PavedDrive 1460 non-null object\n",
|
||
"WoodDeckSF 1460 non-null int64\n",
|
||
"OpenPorchSF 1460 non-null int64\n",
|
||
"EnclosedPorch 1460 non-null int64\n",
|
||
"3SsnPorch 1460 non-null int64\n",
|
||
"ScreenPorch 1460 non-null int64\n",
|
||
"PoolArea 1460 non-null int64\n",
|
||
"PoolQC 7 non-null object\n",
|
||
"Fence 281 non-null object\n",
|
||
"MiscFeature 54 non-null object\n",
|
||
"MiscVal 1460 non-null int64\n",
|
||
"MoSold 1460 non-null int64\n",
|
||
"YrSold 1460 non-null int64\n",
|
||
"SaleType 1460 non-null object\n",
|
||
"SaleCondition 1460 non-null object\n",
|
||
"SalePrice 1460 non-null int64\n",
|
||
"dtypes: float64(3), int64(35), object(43)\n",
|
||
"memory usage: 924.0+ KB\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"data.info()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Id</th>\n",
|
||
" <th>MSSubClass</th>\n",
|
||
" <th>LotFrontage</th>\n",
|
||
" <th>LotArea</th>\n",
|
||
" <th>OverallQual</th>\n",
|
||
" <th>OverallCond</th>\n",
|
||
" <th>YearBuilt</th>\n",
|
||
" <th>YearRemodAdd</th>\n",
|
||
" <th>MasVnrArea</th>\n",
|
||
" <th>BsmtFinSF1</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>WoodDeckSF</th>\n",
|
||
" <th>OpenPorchSF</th>\n",
|
||
" <th>EnclosedPorch</th>\n",
|
||
" <th>3SsnPorch</th>\n",
|
||
" <th>ScreenPorch</th>\n",
|
||
" <th>PoolArea</th>\n",
|
||
" <th>MiscVal</th>\n",
|
||
" <th>MoSold</th>\n",
|
||
" <th>YrSold</th>\n",
|
||
" <th>SalePrice</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>count</th>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" <td>1201.000000</td>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" <td>1452.000000</td>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>mean</th>\n",
|
||
" <td>730.500000</td>\n",
|
||
" <td>56.897260</td>\n",
|
||
" <td>70.049958</td>\n",
|
||
" <td>10516.828082</td>\n",
|
||
" <td>6.099315</td>\n",
|
||
" <td>5.575342</td>\n",
|
||
" <td>1971.267808</td>\n",
|
||
" <td>1984.865753</td>\n",
|
||
" <td>103.685262</td>\n",
|
||
" <td>443.639726</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>94.244521</td>\n",
|
||
" <td>46.660274</td>\n",
|
||
" <td>21.954110</td>\n",
|
||
" <td>3.409589</td>\n",
|
||
" <td>15.060959</td>\n",
|
||
" <td>2.758904</td>\n",
|
||
" <td>43.489041</td>\n",
|
||
" <td>6.321918</td>\n",
|
||
" <td>2007.815753</td>\n",
|
||
" <td>180921.195890</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>std</th>\n",
|
||
" <td>421.610009</td>\n",
|
||
" <td>42.300571</td>\n",
|
||
" <td>24.284752</td>\n",
|
||
" <td>9981.264932</td>\n",
|
||
" <td>1.382997</td>\n",
|
||
" <td>1.112799</td>\n",
|
||
" <td>30.202904</td>\n",
|
||
" <td>20.645407</td>\n",
|
||
" <td>181.066207</td>\n",
|
||
" <td>456.098091</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>125.338794</td>\n",
|
||
" <td>66.256028</td>\n",
|
||
" <td>61.119149</td>\n",
|
||
" <td>29.317331</td>\n",
|
||
" <td>55.757415</td>\n",
|
||
" <td>40.177307</td>\n",
|
||
" <td>496.123024</td>\n",
|
||
" <td>2.703626</td>\n",
|
||
" <td>1.328095</td>\n",
|
||
" <td>79442.502883</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>min</th>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>20.000000</td>\n",
|
||
" <td>21.000000</td>\n",
|
||
" <td>1300.000000</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>1872.000000</td>\n",
|
||
" <td>1950.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>2006.000000</td>\n",
|
||
" <td>34900.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>25%</th>\n",
|
||
" <td>365.750000</td>\n",
|
||
" <td>20.000000</td>\n",
|
||
" <td>59.000000</td>\n",
|
||
" <td>7553.500000</td>\n",
|
||
" <td>5.000000</td>\n",
|
||
" <td>5.000000</td>\n",
|
||
" <td>1954.000000</td>\n",
|
||
" <td>1967.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>5.000000</td>\n",
|
||
" <td>2007.000000</td>\n",
|
||
" <td>129975.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>50%</th>\n",
|
||
" <td>730.500000</td>\n",
|
||
" <td>50.000000</td>\n",
|
||
" <td>69.000000</td>\n",
|
||
" <td>9478.500000</td>\n",
|
||
" <td>6.000000</td>\n",
|
||
" <td>5.000000</td>\n",
|
||
" <td>1973.000000</td>\n",
|
||
" <td>1994.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>383.500000</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>25.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>6.000000</td>\n",
|
||
" <td>2008.000000</td>\n",
|
||
" <td>163000.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>75%</th>\n",
|
||
" <td>1095.250000</td>\n",
|
||
" <td>70.000000</td>\n",
|
||
" <td>80.000000</td>\n",
|
||
" <td>11601.500000</td>\n",
|
||
" <td>7.000000</td>\n",
|
||
" <td>6.000000</td>\n",
|
||
" <td>2000.000000</td>\n",
|
||
" <td>2004.000000</td>\n",
|
||
" <td>166.000000</td>\n",
|
||
" <td>712.250000</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>168.000000</td>\n",
|
||
" <td>68.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>8.000000</td>\n",
|
||
" <td>2009.000000</td>\n",
|
||
" <td>214000.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>max</th>\n",
|
||
" <td>1460.000000</td>\n",
|
||
" <td>190.000000</td>\n",
|
||
" <td>313.000000</td>\n",
|
||
" <td>215245.000000</td>\n",
|
||
" <td>10.000000</td>\n",
|
||
" <td>9.000000</td>\n",
|
||
" <td>2010.000000</td>\n",
|
||
" <td>2010.000000</td>\n",
|
||
" <td>1600.000000</td>\n",
|
||
" <td>5644.000000</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>857.000000</td>\n",
|
||
" <td>547.000000</td>\n",
|
||
" <td>552.000000</td>\n",
|
||
" <td>508.000000</td>\n",
|
||
" <td>480.000000</td>\n",
|
||
" <td>738.000000</td>\n",
|
||
" <td>15500.000000</td>\n",
|
||
" <td>12.000000</td>\n",
|
||
" <td>2010.000000</td>\n",
|
||
" <td>755000.000000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>8 rows × 38 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Id MSSubClass LotFrontage LotArea OverallQual \\\n",
|
||
"count 1460.000000 1460.000000 1201.000000 1460.000000 1460.000000 \n",
|
||
"mean 730.500000 56.897260 70.049958 10516.828082 6.099315 \n",
|
||
"std 421.610009 42.300571 24.284752 9981.264932 1.382997 \n",
|
||
"min 1.000000 20.000000 21.000000 1300.000000 1.000000 \n",
|
||
"25% 365.750000 20.000000 59.000000 7553.500000 5.000000 \n",
|
||
"50% 730.500000 50.000000 69.000000 9478.500000 6.000000 \n",
|
||
"75% 1095.250000 70.000000 80.000000 11601.500000 7.000000 \n",
|
||
"max 1460.000000 190.000000 313.000000 215245.000000 10.000000 \n",
|
||
"\n",
|
||
" OverallCond YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 \\\n",
|
||
"count 1460.000000 1460.000000 1460.000000 1452.000000 1460.000000 \n",
|
||
"mean 5.575342 1971.267808 1984.865753 103.685262 443.639726 \n",
|
||
"std 1.112799 30.202904 20.645407 181.066207 456.098091 \n",
|
||
"min 1.000000 1872.000000 1950.000000 0.000000 0.000000 \n",
|
||
"25% 5.000000 1954.000000 1967.000000 0.000000 0.000000 \n",
|
||
"50% 5.000000 1973.000000 1994.000000 0.000000 383.500000 \n",
|
||
"75% 6.000000 2000.000000 2004.000000 166.000000 712.250000 \n",
|
||
"max 9.000000 2010.000000 2010.000000 1600.000000 5644.000000 \n",
|
||
"\n",
|
||
" ... WoodDeckSF OpenPorchSF EnclosedPorch 3SsnPorch \\\n",
|
||
"count ... 1460.000000 1460.000000 1460.000000 1460.000000 \n",
|
||
"mean ... 94.244521 46.660274 21.954110 3.409589 \n",
|
||
"std ... 125.338794 66.256028 61.119149 29.317331 \n",
|
||
"min ... 0.000000 0.000000 0.000000 0.000000 \n",
|
||
"25% ... 0.000000 0.000000 0.000000 0.000000 \n",
|
||
"50% ... 0.000000 25.000000 0.000000 0.000000 \n",
|
||
"75% ... 168.000000 68.000000 0.000000 0.000000 \n",
|
||
"max ... 857.000000 547.000000 552.000000 508.000000 \n",
|
||
"\n",
|
||
" ScreenPorch PoolArea MiscVal MoSold YrSold \\\n",
|
||
"count 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 \n",
|
||
"mean 15.060959 2.758904 43.489041 6.321918 2007.815753 \n",
|
||
"std 55.757415 40.177307 496.123024 2.703626 1.328095 \n",
|
||
"min 0.000000 0.000000 0.000000 1.000000 2006.000000 \n",
|
||
"25% 0.000000 0.000000 0.000000 5.000000 2007.000000 \n",
|
||
"50% 0.000000 0.000000 0.000000 6.000000 2008.000000 \n",
|
||
"75% 0.000000 0.000000 0.000000 8.000000 2009.000000 \n",
|
||
"max 480.000000 738.000000 15500.000000 12.000000 2010.000000 \n",
|
||
"\n",
|
||
" SalePrice \n",
|
||
"count 1460.000000 \n",
|
||
"mean 180921.195890 \n",
|
||
"std 79442.502883 \n",
|
||
"min 34900.000000 \n",
|
||
"25% 129975.000000 \n",
|
||
"50% 163000.000000 \n",
|
||
"75% 214000.000000 \n",
|
||
"max 755000.000000 \n",
|
||
"\n",
|
||
"[8 rows x 38 columns]"
|
||
]
|
||
},
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.describe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
}
|
||
},
|
||
"source": [
|
||
"### Dostęp do danych"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "fragment"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Index([u'Id', u'MSSubClass', u'MSZoning', u'LotFrontage', u'LotArea',\n",
|
||
" u'Street', u'Alley', u'LotShape', u'LandContour', u'Utilities',\n",
|
||
" u'LotConfig', u'LandSlope', u'Neighborhood', u'Condition1',\n",
|
||
" u'Condition2', u'BldgType', u'HouseStyle', u'OverallQual',\n",
|
||
" u'OverallCond', u'YearBuilt', u'YearRemodAdd', u'RoofStyle',\n",
|
||
" u'RoofMatl', u'Exterior1st', u'Exterior2nd', u'MasVnrType',\n",
|
||
" u'MasVnrArea', u'ExterQual', u'ExterCond', u'Foundation', u'BsmtQual',\n",
|
||
" u'BsmtCond', u'BsmtExposure', u'BsmtFinType1', u'BsmtFinSF1',\n",
|
||
" u'BsmtFinType2', u'BsmtFinSF2', u'BsmtUnfSF', u'TotalBsmtSF',\n",
|
||
" u'Heating', u'HeatingQC', u'CentralAir', u'Electrical', u'1stFlrSF',\n",
|
||
" u'2ndFlrSF', u'LowQualFinSF', u'GrLivArea', u'BsmtFullBath',\n",
|
||
" u'BsmtHalfBath', u'FullBath', u'HalfBath', u'BedroomAbvGr',\n",
|
||
" u'KitchenAbvGr', u'KitchenQual', u'TotRmsAbvGrd', u'Functional',\n",
|
||
" u'Fireplaces', u'FireplaceQu', u'GarageType', u'GarageYrBlt',\n",
|
||
" u'GarageFinish', u'GarageCars', u'GarageArea', u'GarageQual',\n",
|
||
" u'GarageCond', u'PavedDrive', u'WoodDeckSF', u'OpenPorchSF',\n",
|
||
" u'EnclosedPorch', u'3SsnPorch', u'ScreenPorch', u'PoolArea', u'PoolQC',\n",
|
||
" u'Fence', u'MiscFeature', u'MiscVal', u'MoSold', u'YrSold', u'SaleType',\n",
|
||
" u'SaleCondition', u'SalePrice'],\n",
|
||
" dtype='object')\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(data.columns)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"0 60\n",
|
||
"1 20\n",
|
||
"2 60\n",
|
||
"3 70\n",
|
||
"4 60\n",
|
||
"Name: MSSubClass, dtype: int64\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(data['MSSubClass'].head())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" MSSubClass SalePrice\n",
|
||
"0 60 208500\n",
|
||
"1 20 181500\n",
|
||
"2 60 223500\n",
|
||
"3 70 140000\n",
|
||
"4 60 250000\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(data[['MSSubClass', 'SalePrice']].head())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {
|
||
"slideshow": {
|
||
"slide_type": "slide"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Id</th>\n",
|
||
" <th>MSSubClass</th>\n",
|
||
" <th>MSZoning</th>\n",
|
||
" <th>LotFrontage</th>\n",
|
||
" <th>LotArea</th>\n",
|
||
" <th>Street</th>\n",
|
||
" <th>Alley</th>\n",
|
||
" <th>LotShape</th>\n",
|
||
" <th>LandContour</th>\n",
|
||
" <th>Utilities</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>PoolArea</th>\n",
|
||
" <th>PoolQC</th>\n",
|
||
" <th>Fence</th>\n",
|
||
" <th>MiscFeature</th>\n",
|
||
" <th>MiscVal</th>\n",
|
||
" <th>MoSold</th>\n",
|
||
" <th>YrSold</th>\n",
|
||
" <th>SaleType</th>\n",
|
||
" <th>SaleCondition</th>\n",
|
||
" <th>SalePrice</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>60</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>65.0</td>\n",
|
||
" <td>8450</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Reg</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2008</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>208500</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>70</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>60.0</td>\n",
|
||
" <td>9550</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>IR1</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2006</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Abnorml</td>\n",
|
||
" <td>140000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>2 rows × 81 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
|
||
"0 1 60 RL 65.0 8450 Pave NaN Reg \n",
|
||
"3 4 70 RL 60.0 9550 Pave NaN IR1 \n",
|
||
"\n",
|
||
" LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n",
|
||
"0 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"3 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"\n",
|
||
" MoSold YrSold SaleType SaleCondition SalePrice \n",
|
||
"0 2 2008 WD Normal 208500 \n",
|
||
"3 2 2006 WD Abnorml 140000 \n",
|
||
"\n",
|
||
"[2 rows x 81 columns]"
|
||
]
|
||
},
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.loc[[0,3]]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Id</th>\n",
|
||
" <th>MSSubClass</th>\n",
|
||
" <th>MSZoning</th>\n",
|
||
" <th>LotFrontage</th>\n",
|
||
" <th>LotArea</th>\n",
|
||
" <th>Street</th>\n",
|
||
" <th>Alley</th>\n",
|
||
" <th>LotShape</th>\n",
|
||
" <th>LandContour</th>\n",
|
||
" <th>Utilities</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>PoolArea</th>\n",
|
||
" <th>PoolQC</th>\n",
|
||
" <th>Fence</th>\n",
|
||
" <th>MiscFeature</th>\n",
|
||
" <th>MiscVal</th>\n",
|
||
" <th>MoSold</th>\n",
|
||
" <th>YrSold</th>\n",
|
||
" <th>SaleType</th>\n",
|
||
" <th>SaleCondition</th>\n",
|
||
" <th>SalePrice</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>60</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>65.0</td>\n",
|
||
" <td>8450</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Reg</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2008</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>208500</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>20</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>80.0</td>\n",
|
||
" <td>9600</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Reg</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>2007</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>181500</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>60</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>68.0</td>\n",
|
||
" <td>11250</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>IR1</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>2008</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>223500</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>70</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>60.0</td>\n",
|
||
" <td>9550</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>IR1</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2006</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Abnorml</td>\n",
|
||
" <td>140000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>60</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>84.0</td>\n",
|
||
" <td>14260</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>IR1</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>12</td>\n",
|
||
" <td>2008</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>250000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>50</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>85.0</td>\n",
|
||
" <td>14115</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>IR1</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>MnPrv</td>\n",
|
||
" <td>Shed</td>\n",
|
||
" <td>700</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>2009</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>143000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>6 rows × 81 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
|
||
"0 1 60 RL 65.0 8450 Pave NaN Reg \n",
|
||
"1 2 20 RL 80.0 9600 Pave NaN Reg \n",
|
||
"2 3 60 RL 68.0 11250 Pave NaN IR1 \n",
|
||
"3 4 70 RL 60.0 9550 Pave NaN IR1 \n",
|
||
"4 5 60 RL 84.0 14260 Pave NaN IR1 \n",
|
||
"5 6 50 RL 85.0 14115 Pave NaN IR1 \n",
|
||
"\n",
|
||
" LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n",
|
||
"0 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"1 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"2 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"3 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"4 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"5 Lvl AllPub ... 0 NaN MnPrv Shed 700 \n",
|
||
"\n",
|
||
" MoSold YrSold SaleType SaleCondition SalePrice \n",
|
||
"0 2 2008 WD Normal 208500 \n",
|
||
"1 5 2007 WD Normal 181500 \n",
|
||
"2 9 2008 WD Normal 223500 \n",
|
||
"3 2 2006 WD Abnorml 140000 \n",
|
||
"4 12 2008 WD Normal 250000 \n",
|
||
"5 10 2009 WD Normal 143000 \n",
|
||
"\n",
|
||
"[6 rows x 81 columns]"
|
||
]
|
||
},
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.loc[0:5]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Id</th>\n",
|
||
" <th>MSSubClass</th>\n",
|
||
" <th>MSZoning</th>\n",
|
||
" <th>LotFrontage</th>\n",
|
||
" <th>LotArea</th>\n",
|
||
" <th>Street</th>\n",
|
||
" <th>Alley</th>\n",
|
||
" <th>LotShape</th>\n",
|
||
" <th>LandContour</th>\n",
|
||
" <th>Utilities</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>PoolArea</th>\n",
|
||
" <th>PoolQC</th>\n",
|
||
" <th>Fence</th>\n",
|
||
" <th>MiscFeature</th>\n",
|
||
" <th>MiscVal</th>\n",
|
||
" <th>MoSold</th>\n",
|
||
" <th>YrSold</th>\n",
|
||
" <th>SaleType</th>\n",
|
||
" <th>SaleCondition</th>\n",
|
||
" <th>SalePrice</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>60</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>65.0</td>\n",
|
||
" <td>8450</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Reg</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2008</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>208500</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>20</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>80.0</td>\n",
|
||
" <td>9600</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Reg</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>2007</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>181500</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>60</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>68.0</td>\n",
|
||
" <td>11250</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>IR1</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>2008</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>223500</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>70</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>60.0</td>\n",
|
||
" <td>9550</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>IR1</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2006</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Abnorml</td>\n",
|
||
" <td>140000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>60</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>84.0</td>\n",
|
||
" <td>14260</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>IR1</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>12</td>\n",
|
||
" <td>2008</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>250000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 81 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
|
||
"0 1 60 RL 65.0 8450 Pave NaN Reg \n",
|
||
"1 2 20 RL 80.0 9600 Pave NaN Reg \n",
|
||
"2 3 60 RL 68.0 11250 Pave NaN IR1 \n",
|
||
"3 4 70 RL 60.0 9550 Pave NaN IR1 \n",
|
||
"4 5 60 RL 84.0 14260 Pave NaN IR1 \n",
|
||
"\n",
|
||
" LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n",
|
||
"0 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"1 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"2 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"3 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"4 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"\n",
|
||
" MoSold YrSold SaleType SaleCondition SalePrice \n",
|
||
"0 2 2008 WD Normal 208500 \n",
|
||
"1 5 2007 WD Normal 181500 \n",
|
||
"2 9 2008 WD Normal 223500 \n",
|
||
"3 2 2006 WD Abnorml 140000 \n",
|
||
"4 12 2008 WD Normal 250000 \n",
|
||
"\n",
|
||
"[5 rows x 81 columns]"
|
||
]
|
||
},
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data[data['MSZoning'] == 'RL'].head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Id</th>\n",
|
||
" <th>MSSubClass</th>\n",
|
||
" <th>MSZoning</th>\n",
|
||
" <th>LotFrontage</th>\n",
|
||
" <th>LotArea</th>\n",
|
||
" <th>Street</th>\n",
|
||
" <th>Alley</th>\n",
|
||
" <th>LotShape</th>\n",
|
||
" <th>LandContour</th>\n",
|
||
" <th>Utilities</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>PoolArea</th>\n",
|
||
" <th>PoolQC</th>\n",
|
||
" <th>Fence</th>\n",
|
||
" <th>MiscFeature</th>\n",
|
||
" <th>MiscVal</th>\n",
|
||
" <th>MoSold</th>\n",
|
||
" <th>YrSold</th>\n",
|
||
" <th>SaleType</th>\n",
|
||
" <th>SaleCondition</th>\n",
|
||
" <th>SalePrice</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>1</td>\n",
|
||
" <td>60</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>65.0</td>\n",
|
||
" <td>8450</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Reg</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2008</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>208500</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>20</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>80.0</td>\n",
|
||
" <td>9600</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Reg</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>2007</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>181500</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>7</td>\n",
|
||
" <td>20</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>75.0</td>\n",
|
||
" <td>10084</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Reg</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>8</td>\n",
|
||
" <td>2007</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>307000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>10</td>\n",
|
||
" <td>190</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>50.0</td>\n",
|
||
" <td>7420</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Reg</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>2008</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>118000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>10</th>\n",
|
||
" <td>11</td>\n",
|
||
" <td>20</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>70.0</td>\n",
|
||
" <td>11200</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Reg</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2008</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>129500</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 81 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
|
||
"0 1 60 RL 65.0 8450 Pave NaN Reg \n",
|
||
"1 2 20 RL 80.0 9600 Pave NaN Reg \n",
|
||
"6 7 20 RL 75.0 10084 Pave NaN Reg \n",
|
||
"9 10 190 RL 50.0 7420 Pave NaN Reg \n",
|
||
"10 11 20 RL 70.0 11200 Pave NaN Reg \n",
|
||
"\n",
|
||
" LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n",
|
||
"0 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"1 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"6 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"9 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"10 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
|
||
"\n",
|
||
" MoSold YrSold SaleType SaleCondition SalePrice \n",
|
||
"0 2 2008 WD Normal 208500 \n",
|
||
"1 5 2007 WD Normal 181500 \n",
|
||
"6 8 2007 WD Normal 307000 \n",
|
||
"9 1 2008 WD Normal 118000 \n",
|
||
"10 2 2008 WD Normal 129500 \n",
|
||
"\n",
|
||
"[5 rows x 81 columns]"
|
||
]
|
||
},
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data[(data['MSZoning'] == 'RL') & (data['LotShape'] == 'Reg')].head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"ceny = data['SalePrice']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"180921.19589041095"
|
||
]
|
||
},
|
||
"execution_count": 22,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"ceny.mean()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"755000"
|
||
]
|
||
},
|
||
"execution_count": 23,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"ceny.max()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"'SalePrice'"
|
||
]
|
||
},
|
||
"execution_count": 25,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"ceny.name"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"('Plus vat:', 0 256455.00\n",
|
||
"1 223245.00\n",
|
||
"2 274905.00\n",
|
||
"3 172200.00\n",
|
||
"4 307500.00\n",
|
||
"5 175890.00\n",
|
||
"6 377610.00\n",
|
||
"7 246000.00\n",
|
||
"8 159777.00\n",
|
||
"9 145140.00\n",
|
||
"10 159285.00\n",
|
||
"11 424350.00\n",
|
||
"12 177120.00\n",
|
||
"13 343785.00\n",
|
||
"14 193110.00\n",
|
||
"15 162360.00\n",
|
||
"16 183270.00\n",
|
||
"17 110700.00\n",
|
||
"18 195570.00\n",
|
||
"19 170970.00\n",
|
||
"20 400119.00\n",
|
||
"21 171462.00\n",
|
||
"22 282900.00\n",
|
||
"23 159777.00\n",
|
||
"24 189420.00\n",
|
||
"25 315249.00\n",
|
||
"26 165804.00\n",
|
||
"27 376380.00\n",
|
||
"28 255225.00\n",
|
||
"29 84255.00\n",
|
||
" ... \n",
|
||
"1430 236332.20\n",
|
||
"1431 176812.50\n",
|
||
"1432 79335.00\n",
|
||
"1433 229395.00\n",
|
||
"1434 196800.00\n",
|
||
"1435 214020.00\n",
|
||
"1436 148215.00\n",
|
||
"1437 485378.91\n",
|
||
"1438 184131.00\n",
|
||
"1439 242310.00\n",
|
||
"1440 234930.00\n",
|
||
"1441 183639.00\n",
|
||
"1442 381300.00\n",
|
||
"1443 148830.00\n",
|
||
"1444 220908.00\n",
|
||
"1445 158670.00\n",
|
||
"1446 194217.00\n",
|
||
"1447 295200.00\n",
|
||
"1448 137760.00\n",
|
||
"1449 113160.00\n",
|
||
"1450 167280.00\n",
|
||
"1451 353120.70\n",
|
||
"1452 178350.00\n",
|
||
"1453 103935.00\n",
|
||
"1454 227550.00\n",
|
||
"1455 215250.00\n",
|
||
"1456 258300.00\n",
|
||
"1457 327795.00\n",
|
||
"1458 174813.75\n",
|
||
"1459 181425.00\n",
|
||
"Name: SalePrice, Length: 1460, dtype: float64)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(\"Plus vat:\", ceny * 1.23)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 35,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array(['RL', 'RM', 'C (all)', 'FV', 'RH'], dtype=object)"
|
||
]
|
||
},
|
||
"execution_count": 35,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.MSZoning.unique()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"RL 1151\n",
|
||
"RM 218\n",
|
||
"FV 65\n",
|
||
"RH 16\n",
|
||
"C (all) 10\n",
|
||
"Name: MSZoning, dtype: int64"
|
||
]
|
||
},
|
||
"execution_count": 36,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.MSZoning.value_counts()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"data['nowa'] = ceny * 1.23"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>MSSubClass</th>\n",
|
||
" <th>MSZoning</th>\n",
|
||
" <th>LotFrontage</th>\n",
|
||
" <th>Street</th>\n",
|
||
" <th>Alley</th>\n",
|
||
" <th>LotShape</th>\n",
|
||
" <th>LandContour</th>\n",
|
||
" <th>Utilities</th>\n",
|
||
" <th>LotConfig</th>\n",
|
||
" <th>LandSlope</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>PoolQC</th>\n",
|
||
" <th>Fence</th>\n",
|
||
" <th>MiscFeature</th>\n",
|
||
" <th>MiscVal</th>\n",
|
||
" <th>MoSold</th>\n",
|
||
" <th>YrSold</th>\n",
|
||
" <th>SaleType</th>\n",
|
||
" <th>SaleCondition</th>\n",
|
||
" <th>SalePrice</th>\n",
|
||
" <th>nowa</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>60</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>65.0</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Reg</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>Inside</td>\n",
|
||
" <td>Gtl</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2008</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>208500</td>\n",
|
||
" <td>256455.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>20</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>80.0</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Reg</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>FR2</td>\n",
|
||
" <td>Gtl</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>2007</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>181500</td>\n",
|
||
" <td>223245.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>60</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>68.0</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>IR1</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>Inside</td>\n",
|
||
" <td>Gtl</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>2008</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>223500</td>\n",
|
||
" <td>274905.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>70</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>60.0</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>IR1</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>Corner</td>\n",
|
||
" <td>Gtl</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2006</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Abnorml</td>\n",
|
||
" <td>140000</td>\n",
|
||
" <td>172200.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>60</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>84.0</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>IR1</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>FR2</td>\n",
|
||
" <td>Gtl</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>12</td>\n",
|
||
" <td>2008</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>250000</td>\n",
|
||
" <td>307500.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 80 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" MSSubClass MSZoning LotFrontage Street Alley LotShape LandContour \\\n",
|
||
"0 60 RL 65.0 Pave NaN Reg Lvl \n",
|
||
"1 20 RL 80.0 Pave NaN Reg Lvl \n",
|
||
"2 60 RL 68.0 Pave NaN IR1 Lvl \n",
|
||
"3 70 RL 60.0 Pave NaN IR1 Lvl \n",
|
||
"4 60 RL 84.0 Pave NaN IR1 Lvl \n",
|
||
"\n",
|
||
" Utilities LotConfig LandSlope ... PoolQC Fence MiscFeature MiscVal \\\n",
|
||
"0 AllPub Inside Gtl ... NaN NaN NaN 0 \n",
|
||
"1 AllPub FR2 Gtl ... NaN NaN NaN 0 \n",
|
||
"2 AllPub Inside Gtl ... NaN NaN NaN 0 \n",
|
||
"3 AllPub Corner Gtl ... NaN NaN NaN 0 \n",
|
||
"4 AllPub FR2 Gtl ... NaN NaN NaN 0 \n",
|
||
"\n",
|
||
" MoSold YrSold SaleType SaleCondition SalePrice nowa \n",
|
||
"0 2 2008 WD Normal 208500 256455.0 \n",
|
||
"1 5 2007 WD Normal 181500 223245.0 \n",
|
||
"2 9 2008 WD Normal 223500 274905.0 \n",
|
||
"3 2 2006 WD Abnorml 140000 172200.0 \n",
|
||
"4 12 2008 WD Normal 250000 307500.0 \n",
|
||
"\n",
|
||
"[5 rows x 80 columns]"
|
||
]
|
||
},
|
||
"execution_count": 42,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.drop('LotArea', axis=1)\n",
|
||
"data.drop(['Id', 'LotArea'], axis=1).head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 44,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Id</th>\n",
|
||
" <th>MSSubClass</th>\n",
|
||
" <th>MSZoning</th>\n",
|
||
" <th>LotFrontage</th>\n",
|
||
" <th>LotArea</th>\n",
|
||
" <th>Street</th>\n",
|
||
" <th>Alley</th>\n",
|
||
" <th>LotShape</th>\n",
|
||
" <th>LandContour</th>\n",
|
||
" <th>Utilities</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>PoolQC</th>\n",
|
||
" <th>Fence</th>\n",
|
||
" <th>MiscFeature</th>\n",
|
||
" <th>MiscVal</th>\n",
|
||
" <th>MoSold</th>\n",
|
||
" <th>YrSold</th>\n",
|
||
" <th>SaleType</th>\n",
|
||
" <th>SaleCondition</th>\n",
|
||
" <th>SalePrice</th>\n",
|
||
" <th>nowa</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>2</td>\n",
|
||
" <td>20</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>80.0</td>\n",
|
||
" <td>9600</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>Reg</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>2007</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>181500</td>\n",
|
||
" <td>223245.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>3</td>\n",
|
||
" <td>60</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>68.0</td>\n",
|
||
" <td>11250</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>IR1</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>9</td>\n",
|
||
" <td>2008</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>223500</td>\n",
|
||
" <td>274905.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>4</td>\n",
|
||
" <td>70</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>60.0</td>\n",
|
||
" <td>9550</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>IR1</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>2006</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Abnorml</td>\n",
|
||
" <td>140000</td>\n",
|
||
" <td>172200.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>5</td>\n",
|
||
" <td>60</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>84.0</td>\n",
|
||
" <td>14260</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>IR1</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>0</td>\n",
|
||
" <td>12</td>\n",
|
||
" <td>2008</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>250000</td>\n",
|
||
" <td>307500.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>6</td>\n",
|
||
" <td>50</td>\n",
|
||
" <td>RL</td>\n",
|
||
" <td>85.0</td>\n",
|
||
" <td>14115</td>\n",
|
||
" <td>Pave</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>IR1</td>\n",
|
||
" <td>Lvl</td>\n",
|
||
" <td>AllPub</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>MnPrv</td>\n",
|
||
" <td>Shed</td>\n",
|
||
" <td>700</td>\n",
|
||
" <td>10</td>\n",
|
||
" <td>2009</td>\n",
|
||
" <td>WD</td>\n",
|
||
" <td>Normal</td>\n",
|
||
" <td>143000</td>\n",
|
||
" <td>175890.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 82 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
|
||
"1 2 20 RL 80.0 9600 Pave NaN Reg \n",
|
||
"2 3 60 RL 68.0 11250 Pave NaN IR1 \n",
|
||
"3 4 70 RL 60.0 9550 Pave NaN IR1 \n",
|
||
"4 5 60 RL 84.0 14260 Pave NaN IR1 \n",
|
||
"5 6 50 RL 85.0 14115 Pave NaN IR1 \n",
|
||
"\n",
|
||
" LandContour Utilities ... PoolQC Fence MiscFeature MiscVal MoSold \\\n",
|
||
"1 Lvl AllPub ... NaN NaN NaN 0 5 \n",
|
||
"2 Lvl AllPub ... NaN NaN NaN 0 9 \n",
|
||
"3 Lvl AllPub ... NaN NaN NaN 0 2 \n",
|
||
"4 Lvl AllPub ... NaN NaN NaN 0 12 \n",
|
||
"5 Lvl AllPub ... NaN MnPrv Shed 700 10 \n",
|
||
"\n",
|
||
" YrSold SaleType SaleCondition SalePrice nowa \n",
|
||
"1 2007 WD Normal 181500 223245.0 \n",
|
||
"2 2008 WD Normal 223500 274905.0 \n",
|
||
"3 2006 WD Abnorml 140000 172200.0 \n",
|
||
"4 2008 WD Normal 250000 307500.0 \n",
|
||
"5 2009 WD Normal 143000 175890.0 \n",
|
||
"\n",
|
||
"[5 rows x 82 columns]"
|
||
]
|
||
},
|
||
"execution_count": 44,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data.drop(0).head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"celltoolbar": "Slideshow",
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.6.5"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|