diff --git a/labs05/pandas_wprowadzenie.ipynb b/labs05/pandas_wprowadzenie.ipynb
new file mode 100644
index 0000000..a92bc13
--- /dev/null
+++ b/labs05/pandas_wprowadzenie.ipynb
@@ -0,0 +1,2309 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "# Analiza danych w Pythonie\n",
+ "\n",
+ "### Tomasz Dwojak\n",
+ "\n",
+ "### 3 czerwca 2018"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "### Analiza danych:\n",
+ "\n",
+ " * R\n",
+ " * Python"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "### Python Ekosystem\n",
+ "\n",
+ " * pandas: ramka danych\n",
+ " * sklearn: modele ML\n",
+ " * numpy: obliczenia\n",
+ " * matplotlib: wykresy"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "skip"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "%matplotlib inline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "### Typy danych\n",
+ "\n",
+ " * Szereg (`pd.Series`)\n",
+ " * Ramka danych (`pd.DataFrame`)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "### Wczytanie danych"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "fragment"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "data = pd.read_csv(\"./data/iowa.csv.gz\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " MSSubClass | \n",
+ " MSZoning | \n",
+ " LotFrontage | \n",
+ " LotArea | \n",
+ " Street | \n",
+ " Alley | \n",
+ " LotShape | \n",
+ " LandContour | \n",
+ " Utilities | \n",
+ " ... | \n",
+ " PoolArea | \n",
+ " PoolQC | \n",
+ " Fence | \n",
+ " MiscFeature | \n",
+ " MiscVal | \n",
+ " MoSold | \n",
+ " YrSold | \n",
+ " SaleType | \n",
+ " SaleCondition | \n",
+ " SalePrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 65.0 | \n",
+ " 8450 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 208500 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 80.0 | \n",
+ " 9600 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 2007 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 181500 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 68.0 | \n",
+ " 11250 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 9 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 223500 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 70 | \n",
+ " RL | \n",
+ " 60.0 | \n",
+ " 9550 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2006 | \n",
+ " WD | \n",
+ " Abnorml | \n",
+ " 140000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 84.0 | \n",
+ " 14260 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 12 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 250000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 81 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
+ "0 1 60 RL 65.0 8450 Pave NaN Reg \n",
+ "1 2 20 RL 80.0 9600 Pave NaN Reg \n",
+ "2 3 60 RL 68.0 11250 Pave NaN IR1 \n",
+ "3 4 70 RL 60.0 9550 Pave NaN IR1 \n",
+ "4 5 60 RL 84.0 14260 Pave NaN IR1 \n",
+ "\n",
+ " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n",
+ "0 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "1 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "2 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "3 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "4 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "\n",
+ " MoSold YrSold SaleType SaleCondition SalePrice \n",
+ "0 2 2008 WD Normal 208500 \n",
+ "1 5 2007 WD Normal 181500 \n",
+ "2 9 2008 WD Normal 223500 \n",
+ "3 2 2006 WD Abnorml 140000 \n",
+ "4 12 2008 WD Normal 250000 \n",
+ "\n",
+ "[5 rows x 81 columns]"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(1460, 81)\n"
+ ]
+ }
+ ],
+ "source": [
+ "shape = data.shape\n",
+ "rows = shape[0]\n",
+ "cols = shape[1]\n",
+ "\n",
+ "print(rows, cols)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 1460 entries, 0 to 1459\n",
+ "Data columns (total 81 columns):\n",
+ "Id 1460 non-null int64\n",
+ "MSSubClass 1460 non-null int64\n",
+ "MSZoning 1460 non-null object\n",
+ "LotFrontage 1201 non-null float64\n",
+ "LotArea 1460 non-null int64\n",
+ "Street 1460 non-null object\n",
+ "Alley 91 non-null object\n",
+ "LotShape 1460 non-null object\n",
+ "LandContour 1460 non-null object\n",
+ "Utilities 1460 non-null object\n",
+ "LotConfig 1460 non-null object\n",
+ "LandSlope 1460 non-null object\n",
+ "Neighborhood 1460 non-null object\n",
+ "Condition1 1460 non-null object\n",
+ "Condition2 1460 non-null object\n",
+ "BldgType 1460 non-null object\n",
+ "HouseStyle 1460 non-null object\n",
+ "OverallQual 1460 non-null int64\n",
+ "OverallCond 1460 non-null int64\n",
+ "YearBuilt 1460 non-null int64\n",
+ "YearRemodAdd 1460 non-null int64\n",
+ "RoofStyle 1460 non-null object\n",
+ "RoofMatl 1460 non-null object\n",
+ "Exterior1st 1460 non-null object\n",
+ "Exterior2nd 1460 non-null object\n",
+ "MasVnrType 1452 non-null object\n",
+ "MasVnrArea 1452 non-null float64\n",
+ "ExterQual 1460 non-null object\n",
+ "ExterCond 1460 non-null object\n",
+ "Foundation 1460 non-null object\n",
+ "BsmtQual 1423 non-null object\n",
+ "BsmtCond 1423 non-null object\n",
+ "BsmtExposure 1422 non-null object\n",
+ "BsmtFinType1 1423 non-null object\n",
+ "BsmtFinSF1 1460 non-null int64\n",
+ "BsmtFinType2 1422 non-null object\n",
+ "BsmtFinSF2 1460 non-null int64\n",
+ "BsmtUnfSF 1460 non-null int64\n",
+ "TotalBsmtSF 1460 non-null int64\n",
+ "Heating 1460 non-null object\n",
+ "HeatingQC 1460 non-null object\n",
+ "CentralAir 1460 non-null object\n",
+ "Electrical 1459 non-null object\n",
+ "1stFlrSF 1460 non-null int64\n",
+ "2ndFlrSF 1460 non-null int64\n",
+ "LowQualFinSF 1460 non-null int64\n",
+ "GrLivArea 1460 non-null int64\n",
+ "BsmtFullBath 1460 non-null int64\n",
+ "BsmtHalfBath 1460 non-null int64\n",
+ "FullBath 1460 non-null int64\n",
+ "HalfBath 1460 non-null int64\n",
+ "BedroomAbvGr 1460 non-null int64\n",
+ "KitchenAbvGr 1460 non-null int64\n",
+ "KitchenQual 1460 non-null object\n",
+ "TotRmsAbvGrd 1460 non-null int64\n",
+ "Functional 1460 non-null object\n",
+ "Fireplaces 1460 non-null int64\n",
+ "FireplaceQu 770 non-null object\n",
+ "GarageType 1379 non-null object\n",
+ "GarageYrBlt 1379 non-null float64\n",
+ "GarageFinish 1379 non-null object\n",
+ "GarageCars 1460 non-null int64\n",
+ "GarageArea 1460 non-null int64\n",
+ "GarageQual 1379 non-null object\n",
+ "GarageCond 1379 non-null object\n",
+ "PavedDrive 1460 non-null object\n",
+ "WoodDeckSF 1460 non-null int64\n",
+ "OpenPorchSF 1460 non-null int64\n",
+ "EnclosedPorch 1460 non-null int64\n",
+ "3SsnPorch 1460 non-null int64\n",
+ "ScreenPorch 1460 non-null int64\n",
+ "PoolArea 1460 non-null int64\n",
+ "PoolQC 7 non-null object\n",
+ "Fence 281 non-null object\n",
+ "MiscFeature 54 non-null object\n",
+ "MiscVal 1460 non-null int64\n",
+ "MoSold 1460 non-null int64\n",
+ "YrSold 1460 non-null int64\n",
+ "SaleType 1460 non-null object\n",
+ "SaleCondition 1460 non-null object\n",
+ "SalePrice 1460 non-null int64\n",
+ "dtypes: float64(3), int64(35), object(43)\n",
+ "memory usage: 924.0+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "data.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " MSSubClass | \n",
+ " LotFrontage | \n",
+ " LotArea | \n",
+ " OverallQual | \n",
+ " OverallCond | \n",
+ " YearBuilt | \n",
+ " YearRemodAdd | \n",
+ " MasVnrArea | \n",
+ " BsmtFinSF1 | \n",
+ " ... | \n",
+ " WoodDeckSF | \n",
+ " OpenPorchSF | \n",
+ " EnclosedPorch | \n",
+ " 3SsnPorch | \n",
+ " ScreenPorch | \n",
+ " PoolArea | \n",
+ " MiscVal | \n",
+ " MoSold | \n",
+ " YrSold | \n",
+ " SalePrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1201.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1452.000000 | \n",
+ " 1460.000000 | \n",
+ " ... | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 730.500000 | \n",
+ " 56.897260 | \n",
+ " 70.049958 | \n",
+ " 10516.828082 | \n",
+ " 6.099315 | \n",
+ " 5.575342 | \n",
+ " 1971.267808 | \n",
+ " 1984.865753 | \n",
+ " 103.685262 | \n",
+ " 443.639726 | \n",
+ " ... | \n",
+ " 94.244521 | \n",
+ " 46.660274 | \n",
+ " 21.954110 | \n",
+ " 3.409589 | \n",
+ " 15.060959 | \n",
+ " 2.758904 | \n",
+ " 43.489041 | \n",
+ " 6.321918 | \n",
+ " 2007.815753 | \n",
+ " 180921.195890 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 421.610009 | \n",
+ " 42.300571 | \n",
+ " 24.284752 | \n",
+ " 9981.264932 | \n",
+ " 1.382997 | \n",
+ " 1.112799 | \n",
+ " 30.202904 | \n",
+ " 20.645407 | \n",
+ " 181.066207 | \n",
+ " 456.098091 | \n",
+ " ... | \n",
+ " 125.338794 | \n",
+ " 66.256028 | \n",
+ " 61.119149 | \n",
+ " 29.317331 | \n",
+ " 55.757415 | \n",
+ " 40.177307 | \n",
+ " 496.123024 | \n",
+ " 2.703626 | \n",
+ " 1.328095 | \n",
+ " 79442.502883 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 1.000000 | \n",
+ " 20.000000 | \n",
+ " 21.000000 | \n",
+ " 1300.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1872.000000 | \n",
+ " 1950.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 1.000000 | \n",
+ " 2006.000000 | \n",
+ " 34900.000000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 365.750000 | \n",
+ " 20.000000 | \n",
+ " 59.000000 | \n",
+ " 7553.500000 | \n",
+ " 5.000000 | \n",
+ " 5.000000 | \n",
+ " 1954.000000 | \n",
+ " 1967.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 5.000000 | \n",
+ " 2007.000000 | \n",
+ " 129975.000000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 730.500000 | \n",
+ " 50.000000 | \n",
+ " 69.000000 | \n",
+ " 9478.500000 | \n",
+ " 6.000000 | \n",
+ " 5.000000 | \n",
+ " 1973.000000 | \n",
+ " 1994.000000 | \n",
+ " 0.000000 | \n",
+ " 383.500000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 25.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 6.000000 | \n",
+ " 2008.000000 | \n",
+ " 163000.000000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 1095.250000 | \n",
+ " 70.000000 | \n",
+ " 80.000000 | \n",
+ " 11601.500000 | \n",
+ " 7.000000 | \n",
+ " 6.000000 | \n",
+ " 2000.000000 | \n",
+ " 2004.000000 | \n",
+ " 166.000000 | \n",
+ " 712.250000 | \n",
+ " ... | \n",
+ " 168.000000 | \n",
+ " 68.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 8.000000 | \n",
+ " 2009.000000 | \n",
+ " 214000.000000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 1460.000000 | \n",
+ " 190.000000 | \n",
+ " 313.000000 | \n",
+ " 215245.000000 | \n",
+ " 10.000000 | \n",
+ " 9.000000 | \n",
+ " 2010.000000 | \n",
+ " 2010.000000 | \n",
+ " 1600.000000 | \n",
+ " 5644.000000 | \n",
+ " ... | \n",
+ " 857.000000 | \n",
+ " 547.000000 | \n",
+ " 552.000000 | \n",
+ " 508.000000 | \n",
+ " 480.000000 | \n",
+ " 738.000000 | \n",
+ " 15500.000000 | \n",
+ " 12.000000 | \n",
+ " 2010.000000 | \n",
+ " 755000.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
8 rows × 38 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id MSSubClass LotFrontage LotArea OverallQual \\\n",
+ "count 1460.000000 1460.000000 1201.000000 1460.000000 1460.000000 \n",
+ "mean 730.500000 56.897260 70.049958 10516.828082 6.099315 \n",
+ "std 421.610009 42.300571 24.284752 9981.264932 1.382997 \n",
+ "min 1.000000 20.000000 21.000000 1300.000000 1.000000 \n",
+ "25% 365.750000 20.000000 59.000000 7553.500000 5.000000 \n",
+ "50% 730.500000 50.000000 69.000000 9478.500000 6.000000 \n",
+ "75% 1095.250000 70.000000 80.000000 11601.500000 7.000000 \n",
+ "max 1460.000000 190.000000 313.000000 215245.000000 10.000000 \n",
+ "\n",
+ " OverallCond YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 \\\n",
+ "count 1460.000000 1460.000000 1460.000000 1452.000000 1460.000000 \n",
+ "mean 5.575342 1971.267808 1984.865753 103.685262 443.639726 \n",
+ "std 1.112799 30.202904 20.645407 181.066207 456.098091 \n",
+ "min 1.000000 1872.000000 1950.000000 0.000000 0.000000 \n",
+ "25% 5.000000 1954.000000 1967.000000 0.000000 0.000000 \n",
+ "50% 5.000000 1973.000000 1994.000000 0.000000 383.500000 \n",
+ "75% 6.000000 2000.000000 2004.000000 166.000000 712.250000 \n",
+ "max 9.000000 2010.000000 2010.000000 1600.000000 5644.000000 \n",
+ "\n",
+ " ... WoodDeckSF OpenPorchSF EnclosedPorch 3SsnPorch \\\n",
+ "count ... 1460.000000 1460.000000 1460.000000 1460.000000 \n",
+ "mean ... 94.244521 46.660274 21.954110 3.409589 \n",
+ "std ... 125.338794 66.256028 61.119149 29.317331 \n",
+ "min ... 0.000000 0.000000 0.000000 0.000000 \n",
+ "25% ... 0.000000 0.000000 0.000000 0.000000 \n",
+ "50% ... 0.000000 25.000000 0.000000 0.000000 \n",
+ "75% ... 168.000000 68.000000 0.000000 0.000000 \n",
+ "max ... 857.000000 547.000000 552.000000 508.000000 \n",
+ "\n",
+ " ScreenPorch PoolArea MiscVal MoSold YrSold \\\n",
+ "count 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 \n",
+ "mean 15.060959 2.758904 43.489041 6.321918 2007.815753 \n",
+ "std 55.757415 40.177307 496.123024 2.703626 1.328095 \n",
+ "min 0.000000 0.000000 0.000000 1.000000 2006.000000 \n",
+ "25% 0.000000 0.000000 0.000000 5.000000 2007.000000 \n",
+ "50% 0.000000 0.000000 0.000000 6.000000 2008.000000 \n",
+ "75% 0.000000 0.000000 0.000000 8.000000 2009.000000 \n",
+ "max 480.000000 738.000000 15500.000000 12.000000 2010.000000 \n",
+ "\n",
+ " SalePrice \n",
+ "count 1460.000000 \n",
+ "mean 180921.195890 \n",
+ "std 79442.502883 \n",
+ "min 34900.000000 \n",
+ "25% 129975.000000 \n",
+ "50% 163000.000000 \n",
+ "75% 214000.000000 \n",
+ "max 755000.000000 \n",
+ "\n",
+ "[8 rows x 38 columns]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "### Dostęp do danych"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "fragment"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Index([u'Id', u'MSSubClass', u'MSZoning', u'LotFrontage', u'LotArea',\n",
+ " u'Street', u'Alley', u'LotShape', u'LandContour', u'Utilities',\n",
+ " u'LotConfig', u'LandSlope', u'Neighborhood', u'Condition1',\n",
+ " u'Condition2', u'BldgType', u'HouseStyle', u'OverallQual',\n",
+ " u'OverallCond', u'YearBuilt', u'YearRemodAdd', u'RoofStyle',\n",
+ " u'RoofMatl', u'Exterior1st', u'Exterior2nd', u'MasVnrType',\n",
+ " u'MasVnrArea', u'ExterQual', u'ExterCond', u'Foundation', u'BsmtQual',\n",
+ " u'BsmtCond', u'BsmtExposure', u'BsmtFinType1', u'BsmtFinSF1',\n",
+ " u'BsmtFinType2', u'BsmtFinSF2', u'BsmtUnfSF', u'TotalBsmtSF',\n",
+ " u'Heating', u'HeatingQC', u'CentralAir', u'Electrical', u'1stFlrSF',\n",
+ " u'2ndFlrSF', u'LowQualFinSF', u'GrLivArea', u'BsmtFullBath',\n",
+ " u'BsmtHalfBath', u'FullBath', u'HalfBath', u'BedroomAbvGr',\n",
+ " u'KitchenAbvGr', u'KitchenQual', u'TotRmsAbvGrd', u'Functional',\n",
+ " u'Fireplaces', u'FireplaceQu', u'GarageType', u'GarageYrBlt',\n",
+ " u'GarageFinish', u'GarageCars', u'GarageArea', u'GarageQual',\n",
+ " u'GarageCond', u'PavedDrive', u'WoodDeckSF', u'OpenPorchSF',\n",
+ " u'EnclosedPorch', u'3SsnPorch', u'ScreenPorch', u'PoolArea', u'PoolQC',\n",
+ " u'Fence', u'MiscFeature', u'MiscVal', u'MoSold', u'YrSold', u'SaleType',\n",
+ " u'SaleCondition', u'SalePrice'],\n",
+ " dtype='object')\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(data.columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0 60\n",
+ "1 20\n",
+ "2 60\n",
+ "3 70\n",
+ "4 60\n",
+ "Name: MSSubClass, dtype: int64\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(data['MSSubClass'].head())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " MSSubClass SalePrice\n",
+ "0 60 208500\n",
+ "1 20 181500\n",
+ "2 60 223500\n",
+ "3 70 140000\n",
+ "4 60 250000\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(data[['MSSubClass', 'SalePrice']].head())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " MSSubClass | \n",
+ " MSZoning | \n",
+ " LotFrontage | \n",
+ " LotArea | \n",
+ " Street | \n",
+ " Alley | \n",
+ " LotShape | \n",
+ " LandContour | \n",
+ " Utilities | \n",
+ " ... | \n",
+ " PoolArea | \n",
+ " PoolQC | \n",
+ " Fence | \n",
+ " MiscFeature | \n",
+ " MiscVal | \n",
+ " MoSold | \n",
+ " YrSold | \n",
+ " SaleType | \n",
+ " SaleCondition | \n",
+ " SalePrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 65.0 | \n",
+ " 8450 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 208500 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 70 | \n",
+ " RL | \n",
+ " 60.0 | \n",
+ " 9550 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2006 | \n",
+ " WD | \n",
+ " Abnorml | \n",
+ " 140000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2 rows × 81 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
+ "0 1 60 RL 65.0 8450 Pave NaN Reg \n",
+ "3 4 70 RL 60.0 9550 Pave NaN IR1 \n",
+ "\n",
+ " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n",
+ "0 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "3 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "\n",
+ " MoSold YrSold SaleType SaleCondition SalePrice \n",
+ "0 2 2008 WD Normal 208500 \n",
+ "3 2 2006 WD Abnorml 140000 \n",
+ "\n",
+ "[2 rows x 81 columns]"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.loc[[0,3]]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " MSSubClass | \n",
+ " MSZoning | \n",
+ " LotFrontage | \n",
+ " LotArea | \n",
+ " Street | \n",
+ " Alley | \n",
+ " LotShape | \n",
+ " LandContour | \n",
+ " Utilities | \n",
+ " ... | \n",
+ " PoolArea | \n",
+ " PoolQC | \n",
+ " Fence | \n",
+ " MiscFeature | \n",
+ " MiscVal | \n",
+ " MoSold | \n",
+ " YrSold | \n",
+ " SaleType | \n",
+ " SaleCondition | \n",
+ " SalePrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 65.0 | \n",
+ " 8450 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 208500 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 80.0 | \n",
+ " 9600 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 2007 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 181500 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 68.0 | \n",
+ " 11250 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 9 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 223500 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 70 | \n",
+ " RL | \n",
+ " 60.0 | \n",
+ " 9550 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2006 | \n",
+ " WD | \n",
+ " Abnorml | \n",
+ " 140000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 84.0 | \n",
+ " 14260 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 12 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 250000 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 50 | \n",
+ " RL | \n",
+ " 85.0 | \n",
+ " 14115 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " MnPrv | \n",
+ " Shed | \n",
+ " 700 | \n",
+ " 10 | \n",
+ " 2009 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 143000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
6 rows × 81 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
+ "0 1 60 RL 65.0 8450 Pave NaN Reg \n",
+ "1 2 20 RL 80.0 9600 Pave NaN Reg \n",
+ "2 3 60 RL 68.0 11250 Pave NaN IR1 \n",
+ "3 4 70 RL 60.0 9550 Pave NaN IR1 \n",
+ "4 5 60 RL 84.0 14260 Pave NaN IR1 \n",
+ "5 6 50 RL 85.0 14115 Pave NaN IR1 \n",
+ "\n",
+ " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n",
+ "0 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "1 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "2 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "3 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "4 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "5 Lvl AllPub ... 0 NaN MnPrv Shed 700 \n",
+ "\n",
+ " MoSold YrSold SaleType SaleCondition SalePrice \n",
+ "0 2 2008 WD Normal 208500 \n",
+ "1 5 2007 WD Normal 181500 \n",
+ "2 9 2008 WD Normal 223500 \n",
+ "3 2 2006 WD Abnorml 140000 \n",
+ "4 12 2008 WD Normal 250000 \n",
+ "5 10 2009 WD Normal 143000 \n",
+ "\n",
+ "[6 rows x 81 columns]"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.loc[0:5]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " MSSubClass | \n",
+ " MSZoning | \n",
+ " LotFrontage | \n",
+ " LotArea | \n",
+ " Street | \n",
+ " Alley | \n",
+ " LotShape | \n",
+ " LandContour | \n",
+ " Utilities | \n",
+ " ... | \n",
+ " PoolArea | \n",
+ " PoolQC | \n",
+ " Fence | \n",
+ " MiscFeature | \n",
+ " MiscVal | \n",
+ " MoSold | \n",
+ " YrSold | \n",
+ " SaleType | \n",
+ " SaleCondition | \n",
+ " SalePrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 65.0 | \n",
+ " 8450 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 208500 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 80.0 | \n",
+ " 9600 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 2007 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 181500 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 68.0 | \n",
+ " 11250 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 9 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 223500 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 70 | \n",
+ " RL | \n",
+ " 60.0 | \n",
+ " 9550 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2006 | \n",
+ " WD | \n",
+ " Abnorml | \n",
+ " 140000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 84.0 | \n",
+ " 14260 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 12 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 250000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 81 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
+ "0 1 60 RL 65.0 8450 Pave NaN Reg \n",
+ "1 2 20 RL 80.0 9600 Pave NaN Reg \n",
+ "2 3 60 RL 68.0 11250 Pave NaN IR1 \n",
+ "3 4 70 RL 60.0 9550 Pave NaN IR1 \n",
+ "4 5 60 RL 84.0 14260 Pave NaN IR1 \n",
+ "\n",
+ " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n",
+ "0 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "1 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "2 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "3 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "4 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "\n",
+ " MoSold YrSold SaleType SaleCondition SalePrice \n",
+ "0 2 2008 WD Normal 208500 \n",
+ "1 5 2007 WD Normal 181500 \n",
+ "2 9 2008 WD Normal 223500 \n",
+ "3 2 2006 WD Abnorml 140000 \n",
+ "4 12 2008 WD Normal 250000 \n",
+ "\n",
+ "[5 rows x 81 columns]"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data[data['MSZoning'] == 'RL'].head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " MSSubClass | \n",
+ " MSZoning | \n",
+ " LotFrontage | \n",
+ " LotArea | \n",
+ " Street | \n",
+ " Alley | \n",
+ " LotShape | \n",
+ " LandContour | \n",
+ " Utilities | \n",
+ " ... | \n",
+ " PoolArea | \n",
+ " PoolQC | \n",
+ " Fence | \n",
+ " MiscFeature | \n",
+ " MiscVal | \n",
+ " MoSold | \n",
+ " YrSold | \n",
+ " SaleType | \n",
+ " SaleCondition | \n",
+ " SalePrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 65.0 | \n",
+ " 8450 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 208500 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 80.0 | \n",
+ " 9600 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 2007 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 181500 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 7 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 75.0 | \n",
+ " 10084 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 8 | \n",
+ " 2007 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 307000 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 10 | \n",
+ " 190 | \n",
+ " RL | \n",
+ " 50.0 | \n",
+ " 7420 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 118000 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 11 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 70.0 | \n",
+ " 11200 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 129500 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 81 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
+ "0 1 60 RL 65.0 8450 Pave NaN Reg \n",
+ "1 2 20 RL 80.0 9600 Pave NaN Reg \n",
+ "6 7 20 RL 75.0 10084 Pave NaN Reg \n",
+ "9 10 190 RL 50.0 7420 Pave NaN Reg \n",
+ "10 11 20 RL 70.0 11200 Pave NaN Reg \n",
+ "\n",
+ " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n",
+ "0 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "1 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "6 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "9 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "10 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "\n",
+ " MoSold YrSold SaleType SaleCondition SalePrice \n",
+ "0 2 2008 WD Normal 208500 \n",
+ "1 5 2007 WD Normal 181500 \n",
+ "6 8 2007 WD Normal 307000 \n",
+ "9 1 2008 WD Normal 118000 \n",
+ "10 2 2008 WD Normal 129500 \n",
+ "\n",
+ "[5 rows x 81 columns]"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data[(data['MSZoning'] == 'RL') & (data['LotShape'] == 'Reg')].head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ceny = data['SalePrice']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "180921.19589041095"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ceny.mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "755000"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ceny.max()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'SalePrice'"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ceny.name"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "('Plus vat:', 0 256455.00\n",
+ "1 223245.00\n",
+ "2 274905.00\n",
+ "3 172200.00\n",
+ "4 307500.00\n",
+ "5 175890.00\n",
+ "6 377610.00\n",
+ "7 246000.00\n",
+ "8 159777.00\n",
+ "9 145140.00\n",
+ "10 159285.00\n",
+ "11 424350.00\n",
+ "12 177120.00\n",
+ "13 343785.00\n",
+ "14 193110.00\n",
+ "15 162360.00\n",
+ "16 183270.00\n",
+ "17 110700.00\n",
+ "18 195570.00\n",
+ "19 170970.00\n",
+ "20 400119.00\n",
+ "21 171462.00\n",
+ "22 282900.00\n",
+ "23 159777.00\n",
+ "24 189420.00\n",
+ "25 315249.00\n",
+ "26 165804.00\n",
+ "27 376380.00\n",
+ "28 255225.00\n",
+ "29 84255.00\n",
+ " ... \n",
+ "1430 236332.20\n",
+ "1431 176812.50\n",
+ "1432 79335.00\n",
+ "1433 229395.00\n",
+ "1434 196800.00\n",
+ "1435 214020.00\n",
+ "1436 148215.00\n",
+ "1437 485378.91\n",
+ "1438 184131.00\n",
+ "1439 242310.00\n",
+ "1440 234930.00\n",
+ "1441 183639.00\n",
+ "1442 381300.00\n",
+ "1443 148830.00\n",
+ "1444 220908.00\n",
+ "1445 158670.00\n",
+ "1446 194217.00\n",
+ "1447 295200.00\n",
+ "1448 137760.00\n",
+ "1449 113160.00\n",
+ "1450 167280.00\n",
+ "1451 353120.70\n",
+ "1452 178350.00\n",
+ "1453 103935.00\n",
+ "1454 227550.00\n",
+ "1455 215250.00\n",
+ "1456 258300.00\n",
+ "1457 327795.00\n",
+ "1458 174813.75\n",
+ "1459 181425.00\n",
+ "Name: SalePrice, Length: 1460, dtype: float64)\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Plus vat:\", ceny * 1.23)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['RL', 'RM', 'C (all)', 'FV', 'RH'], dtype=object)"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.MSZoning.unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "RL 1151\n",
+ "RM 218\n",
+ "FV 65\n",
+ "RH 16\n",
+ "C (all) 10\n",
+ "Name: MSZoning, dtype: int64"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.MSZoning.value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data['nowa'] = ceny * 1.23"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MSSubClass | \n",
+ " MSZoning | \n",
+ " LotFrontage | \n",
+ " Street | \n",
+ " Alley | \n",
+ " LotShape | \n",
+ " LandContour | \n",
+ " Utilities | \n",
+ " LotConfig | \n",
+ " LandSlope | \n",
+ " ... | \n",
+ " PoolQC | \n",
+ " Fence | \n",
+ " MiscFeature | \n",
+ " MiscVal | \n",
+ " MoSold | \n",
+ " YrSold | \n",
+ " SaleType | \n",
+ " SaleCondition | \n",
+ " SalePrice | \n",
+ " nowa | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 65.0 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " Inside | \n",
+ " Gtl | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 208500 | \n",
+ " 256455.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 80.0 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " FR2 | \n",
+ " Gtl | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 2007 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 181500 | \n",
+ " 223245.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 68.0 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " Inside | \n",
+ " Gtl | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 9 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 223500 | \n",
+ " 274905.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 70 | \n",
+ " RL | \n",
+ " 60.0 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " Corner | \n",
+ " Gtl | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2006 | \n",
+ " WD | \n",
+ " Abnorml | \n",
+ " 140000 | \n",
+ " 172200.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 84.0 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " FR2 | \n",
+ " Gtl | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 12 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 250000 | \n",
+ " 307500.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 80 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " MSSubClass MSZoning LotFrontage Street Alley LotShape LandContour \\\n",
+ "0 60 RL 65.0 Pave NaN Reg Lvl \n",
+ "1 20 RL 80.0 Pave NaN Reg Lvl \n",
+ "2 60 RL 68.0 Pave NaN IR1 Lvl \n",
+ "3 70 RL 60.0 Pave NaN IR1 Lvl \n",
+ "4 60 RL 84.0 Pave NaN IR1 Lvl \n",
+ "\n",
+ " Utilities LotConfig LandSlope ... PoolQC Fence MiscFeature MiscVal \\\n",
+ "0 AllPub Inside Gtl ... NaN NaN NaN 0 \n",
+ "1 AllPub FR2 Gtl ... NaN NaN NaN 0 \n",
+ "2 AllPub Inside Gtl ... NaN NaN NaN 0 \n",
+ "3 AllPub Corner Gtl ... NaN NaN NaN 0 \n",
+ "4 AllPub FR2 Gtl ... NaN NaN NaN 0 \n",
+ "\n",
+ " MoSold YrSold SaleType SaleCondition SalePrice nowa \n",
+ "0 2 2008 WD Normal 208500 256455.0 \n",
+ "1 5 2007 WD Normal 181500 223245.0 \n",
+ "2 9 2008 WD Normal 223500 274905.0 \n",
+ "3 2 2006 WD Abnorml 140000 172200.0 \n",
+ "4 12 2008 WD Normal 250000 307500.0 \n",
+ "\n",
+ "[5 rows x 80 columns]"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.drop('LotArea', axis=1)\n",
+ "data.drop(['Id', 'LotArea'], axis=1).head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " MSSubClass | \n",
+ " MSZoning | \n",
+ " LotFrontage | \n",
+ " LotArea | \n",
+ " Street | \n",
+ " Alley | \n",
+ " LotShape | \n",
+ " LandContour | \n",
+ " Utilities | \n",
+ " ... | \n",
+ " PoolQC | \n",
+ " Fence | \n",
+ " MiscFeature | \n",
+ " MiscVal | \n",
+ " MoSold | \n",
+ " YrSold | \n",
+ " SaleType | \n",
+ " SaleCondition | \n",
+ " SalePrice | \n",
+ " nowa | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 80.0 | \n",
+ " 9600 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 2007 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 181500 | \n",
+ " 223245.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 68.0 | \n",
+ " 11250 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 9 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 223500 | \n",
+ " 274905.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 70 | \n",
+ " RL | \n",
+ " 60.0 | \n",
+ " 9550 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2006 | \n",
+ " WD | \n",
+ " Abnorml | \n",
+ " 140000 | \n",
+ " 172200.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 84.0 | \n",
+ " 14260 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 12 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 250000 | \n",
+ " 307500.0 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 50 | \n",
+ " RL | \n",
+ " 85.0 | \n",
+ " 14115 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " NaN | \n",
+ " MnPrv | \n",
+ " Shed | \n",
+ " 700 | \n",
+ " 10 | \n",
+ " 2009 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 143000 | \n",
+ " 175890.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 82 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
+ "1 2 20 RL 80.0 9600 Pave NaN Reg \n",
+ "2 3 60 RL 68.0 11250 Pave NaN IR1 \n",
+ "3 4 70 RL 60.0 9550 Pave NaN IR1 \n",
+ "4 5 60 RL 84.0 14260 Pave NaN IR1 \n",
+ "5 6 50 RL 85.0 14115 Pave NaN IR1 \n",
+ "\n",
+ " LandContour Utilities ... PoolQC Fence MiscFeature MiscVal MoSold \\\n",
+ "1 Lvl AllPub ... NaN NaN NaN 0 5 \n",
+ "2 Lvl AllPub ... NaN NaN NaN 0 9 \n",
+ "3 Lvl AllPub ... NaN NaN NaN 0 2 \n",
+ "4 Lvl AllPub ... NaN NaN NaN 0 12 \n",
+ "5 Lvl AllPub ... NaN MnPrv Shed 700 10 \n",
+ "\n",
+ " YrSold SaleType SaleCondition SalePrice nowa \n",
+ "1 2007 WD Normal 181500 223245.0 \n",
+ "2 2008 WD Normal 223500 274905.0 \n",
+ "3 2006 WD Abnorml 140000 172200.0 \n",
+ "4 2008 WD Normal 250000 307500.0 \n",
+ "5 2009 WD Normal 143000 175890.0 \n",
+ "\n",
+ "[5 rows x 82 columns]"
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.drop(0).head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "celltoolbar": "Slideshow",
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}