diff --git a/labs05/data/iowa.csv.gz b/labs05/data/iowa.csv.gz
new file mode 100644
index 0000000..935b981
Binary files /dev/null and b/labs05/data/iowa.csv.gz differ
diff --git a/labs05/pandas_wprowadzenie.ipynb b/labs05/pandas_wprowadzenie.ipynb
new file mode 100644
index 0000000..a92bc13
--- /dev/null
+++ b/labs05/pandas_wprowadzenie.ipynb
@@ -0,0 +1,2309 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "# Analiza danych w Pythonie\n",
+ "\n",
+ "### Tomasz Dwojak\n",
+ "\n",
+ "### 3 czerwca 2018"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "### Analiza danych:\n",
+ "\n",
+ " * R\n",
+ " * Python"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "### Python Ekosystem\n",
+ "\n",
+ " * pandas: ramka danych\n",
+ " * sklearn: modele ML\n",
+ " * numpy: obliczenia\n",
+ " * matplotlib: wykresy"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "skip"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "%matplotlib inline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "### Typy danych\n",
+ "\n",
+ " * Szereg (`pd.Series`)\n",
+ " * Ramka danych (`pd.DataFrame`)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "### Wczytanie danych"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "fragment"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "data = pd.read_csv(\"./data/iowa.csv.gz\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " MSSubClass | \n",
+ " MSZoning | \n",
+ " LotFrontage | \n",
+ " LotArea | \n",
+ " Street | \n",
+ " Alley | \n",
+ " LotShape | \n",
+ " LandContour | \n",
+ " Utilities | \n",
+ " ... | \n",
+ " PoolArea | \n",
+ " PoolQC | \n",
+ " Fence | \n",
+ " MiscFeature | \n",
+ " MiscVal | \n",
+ " MoSold | \n",
+ " YrSold | \n",
+ " SaleType | \n",
+ " SaleCondition | \n",
+ " SalePrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 65.0 | \n",
+ " 8450 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 208500 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 80.0 | \n",
+ " 9600 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 2007 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 181500 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 68.0 | \n",
+ " 11250 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 9 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 223500 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 70 | \n",
+ " RL | \n",
+ " 60.0 | \n",
+ " 9550 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2006 | \n",
+ " WD | \n",
+ " Abnorml | \n",
+ " 140000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 84.0 | \n",
+ " 14260 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 12 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 250000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 81 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
+ "0 1 60 RL 65.0 8450 Pave NaN Reg \n",
+ "1 2 20 RL 80.0 9600 Pave NaN Reg \n",
+ "2 3 60 RL 68.0 11250 Pave NaN IR1 \n",
+ "3 4 70 RL 60.0 9550 Pave NaN IR1 \n",
+ "4 5 60 RL 84.0 14260 Pave NaN IR1 \n",
+ "\n",
+ " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n",
+ "0 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "1 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "2 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "3 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "4 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "\n",
+ " MoSold YrSold SaleType SaleCondition SalePrice \n",
+ "0 2 2008 WD Normal 208500 \n",
+ "1 5 2007 WD Normal 181500 \n",
+ "2 9 2008 WD Normal 223500 \n",
+ "3 2 2006 WD Abnorml 140000 \n",
+ "4 12 2008 WD Normal 250000 \n",
+ "\n",
+ "[5 rows x 81 columns]"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(1460, 81)\n"
+ ]
+ }
+ ],
+ "source": [
+ "shape = data.shape\n",
+ "rows = shape[0]\n",
+ "cols = shape[1]\n",
+ "\n",
+ "print(rows, cols)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 1460 entries, 0 to 1459\n",
+ "Data columns (total 81 columns):\n",
+ "Id 1460 non-null int64\n",
+ "MSSubClass 1460 non-null int64\n",
+ "MSZoning 1460 non-null object\n",
+ "LotFrontage 1201 non-null float64\n",
+ "LotArea 1460 non-null int64\n",
+ "Street 1460 non-null object\n",
+ "Alley 91 non-null object\n",
+ "LotShape 1460 non-null object\n",
+ "LandContour 1460 non-null object\n",
+ "Utilities 1460 non-null object\n",
+ "LotConfig 1460 non-null object\n",
+ "LandSlope 1460 non-null object\n",
+ "Neighborhood 1460 non-null object\n",
+ "Condition1 1460 non-null object\n",
+ "Condition2 1460 non-null object\n",
+ "BldgType 1460 non-null object\n",
+ "HouseStyle 1460 non-null object\n",
+ "OverallQual 1460 non-null int64\n",
+ "OverallCond 1460 non-null int64\n",
+ "YearBuilt 1460 non-null int64\n",
+ "YearRemodAdd 1460 non-null int64\n",
+ "RoofStyle 1460 non-null object\n",
+ "RoofMatl 1460 non-null object\n",
+ "Exterior1st 1460 non-null object\n",
+ "Exterior2nd 1460 non-null object\n",
+ "MasVnrType 1452 non-null object\n",
+ "MasVnrArea 1452 non-null float64\n",
+ "ExterQual 1460 non-null object\n",
+ "ExterCond 1460 non-null object\n",
+ "Foundation 1460 non-null object\n",
+ "BsmtQual 1423 non-null object\n",
+ "BsmtCond 1423 non-null object\n",
+ "BsmtExposure 1422 non-null object\n",
+ "BsmtFinType1 1423 non-null object\n",
+ "BsmtFinSF1 1460 non-null int64\n",
+ "BsmtFinType2 1422 non-null object\n",
+ "BsmtFinSF2 1460 non-null int64\n",
+ "BsmtUnfSF 1460 non-null int64\n",
+ "TotalBsmtSF 1460 non-null int64\n",
+ "Heating 1460 non-null object\n",
+ "HeatingQC 1460 non-null object\n",
+ "CentralAir 1460 non-null object\n",
+ "Electrical 1459 non-null object\n",
+ "1stFlrSF 1460 non-null int64\n",
+ "2ndFlrSF 1460 non-null int64\n",
+ "LowQualFinSF 1460 non-null int64\n",
+ "GrLivArea 1460 non-null int64\n",
+ "BsmtFullBath 1460 non-null int64\n",
+ "BsmtHalfBath 1460 non-null int64\n",
+ "FullBath 1460 non-null int64\n",
+ "HalfBath 1460 non-null int64\n",
+ "BedroomAbvGr 1460 non-null int64\n",
+ "KitchenAbvGr 1460 non-null int64\n",
+ "KitchenQual 1460 non-null object\n",
+ "TotRmsAbvGrd 1460 non-null int64\n",
+ "Functional 1460 non-null object\n",
+ "Fireplaces 1460 non-null int64\n",
+ "FireplaceQu 770 non-null object\n",
+ "GarageType 1379 non-null object\n",
+ "GarageYrBlt 1379 non-null float64\n",
+ "GarageFinish 1379 non-null object\n",
+ "GarageCars 1460 non-null int64\n",
+ "GarageArea 1460 non-null int64\n",
+ "GarageQual 1379 non-null object\n",
+ "GarageCond 1379 non-null object\n",
+ "PavedDrive 1460 non-null object\n",
+ "WoodDeckSF 1460 non-null int64\n",
+ "OpenPorchSF 1460 non-null int64\n",
+ "EnclosedPorch 1460 non-null int64\n",
+ "3SsnPorch 1460 non-null int64\n",
+ "ScreenPorch 1460 non-null int64\n",
+ "PoolArea 1460 non-null int64\n",
+ "PoolQC 7 non-null object\n",
+ "Fence 281 non-null object\n",
+ "MiscFeature 54 non-null object\n",
+ "MiscVal 1460 non-null int64\n",
+ "MoSold 1460 non-null int64\n",
+ "YrSold 1460 non-null int64\n",
+ "SaleType 1460 non-null object\n",
+ "SaleCondition 1460 non-null object\n",
+ "SalePrice 1460 non-null int64\n",
+ "dtypes: float64(3), int64(35), object(43)\n",
+ "memory usage: 924.0+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "data.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " MSSubClass | \n",
+ " LotFrontage | \n",
+ " LotArea | \n",
+ " OverallQual | \n",
+ " OverallCond | \n",
+ " YearBuilt | \n",
+ " YearRemodAdd | \n",
+ " MasVnrArea | \n",
+ " BsmtFinSF1 | \n",
+ " ... | \n",
+ " WoodDeckSF | \n",
+ " OpenPorchSF | \n",
+ " EnclosedPorch | \n",
+ " 3SsnPorch | \n",
+ " ScreenPorch | \n",
+ " PoolArea | \n",
+ " MiscVal | \n",
+ " MoSold | \n",
+ " YrSold | \n",
+ " SalePrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1201.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1452.000000 | \n",
+ " 1460.000000 | \n",
+ " ... | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ " 1460.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 730.500000 | \n",
+ " 56.897260 | \n",
+ " 70.049958 | \n",
+ " 10516.828082 | \n",
+ " 6.099315 | \n",
+ " 5.575342 | \n",
+ " 1971.267808 | \n",
+ " 1984.865753 | \n",
+ " 103.685262 | \n",
+ " 443.639726 | \n",
+ " ... | \n",
+ " 94.244521 | \n",
+ " 46.660274 | \n",
+ " 21.954110 | \n",
+ " 3.409589 | \n",
+ " 15.060959 | \n",
+ " 2.758904 | \n",
+ " 43.489041 | \n",
+ " 6.321918 | \n",
+ " 2007.815753 | \n",
+ " 180921.195890 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 421.610009 | \n",
+ " 42.300571 | \n",
+ " 24.284752 | \n",
+ " 9981.264932 | \n",
+ " 1.382997 | \n",
+ " 1.112799 | \n",
+ " 30.202904 | \n",
+ " 20.645407 | \n",
+ " 181.066207 | \n",
+ " 456.098091 | \n",
+ " ... | \n",
+ " 125.338794 | \n",
+ " 66.256028 | \n",
+ " 61.119149 | \n",
+ " 29.317331 | \n",
+ " 55.757415 | \n",
+ " 40.177307 | \n",
+ " 496.123024 | \n",
+ " 2.703626 | \n",
+ " 1.328095 | \n",
+ " 79442.502883 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 1.000000 | \n",
+ " 20.000000 | \n",
+ " 21.000000 | \n",
+ " 1300.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1872.000000 | \n",
+ " 1950.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 1.000000 | \n",
+ " 2006.000000 | \n",
+ " 34900.000000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 365.750000 | \n",
+ " 20.000000 | \n",
+ " 59.000000 | \n",
+ " 7553.500000 | \n",
+ " 5.000000 | \n",
+ " 5.000000 | \n",
+ " 1954.000000 | \n",
+ " 1967.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 5.000000 | \n",
+ " 2007.000000 | \n",
+ " 129975.000000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 730.500000 | \n",
+ " 50.000000 | \n",
+ " 69.000000 | \n",
+ " 9478.500000 | \n",
+ " 6.000000 | \n",
+ " 5.000000 | \n",
+ " 1973.000000 | \n",
+ " 1994.000000 | \n",
+ " 0.000000 | \n",
+ " 383.500000 | \n",
+ " ... | \n",
+ " 0.000000 | \n",
+ " 25.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 6.000000 | \n",
+ " 2008.000000 | \n",
+ " 163000.000000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 1095.250000 | \n",
+ " 70.000000 | \n",
+ " 80.000000 | \n",
+ " 11601.500000 | \n",
+ " 7.000000 | \n",
+ " 6.000000 | \n",
+ " 2000.000000 | \n",
+ " 2004.000000 | \n",
+ " 166.000000 | \n",
+ " 712.250000 | \n",
+ " ... | \n",
+ " 168.000000 | \n",
+ " 68.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 8.000000 | \n",
+ " 2009.000000 | \n",
+ " 214000.000000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 1460.000000 | \n",
+ " 190.000000 | \n",
+ " 313.000000 | \n",
+ " 215245.000000 | \n",
+ " 10.000000 | \n",
+ " 9.000000 | \n",
+ " 2010.000000 | \n",
+ " 2010.000000 | \n",
+ " 1600.000000 | \n",
+ " 5644.000000 | \n",
+ " ... | \n",
+ " 857.000000 | \n",
+ " 547.000000 | \n",
+ " 552.000000 | \n",
+ " 508.000000 | \n",
+ " 480.000000 | \n",
+ " 738.000000 | \n",
+ " 15500.000000 | \n",
+ " 12.000000 | \n",
+ " 2010.000000 | \n",
+ " 755000.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
8 rows × 38 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id MSSubClass LotFrontage LotArea OverallQual \\\n",
+ "count 1460.000000 1460.000000 1201.000000 1460.000000 1460.000000 \n",
+ "mean 730.500000 56.897260 70.049958 10516.828082 6.099315 \n",
+ "std 421.610009 42.300571 24.284752 9981.264932 1.382997 \n",
+ "min 1.000000 20.000000 21.000000 1300.000000 1.000000 \n",
+ "25% 365.750000 20.000000 59.000000 7553.500000 5.000000 \n",
+ "50% 730.500000 50.000000 69.000000 9478.500000 6.000000 \n",
+ "75% 1095.250000 70.000000 80.000000 11601.500000 7.000000 \n",
+ "max 1460.000000 190.000000 313.000000 215245.000000 10.000000 \n",
+ "\n",
+ " OverallCond YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 \\\n",
+ "count 1460.000000 1460.000000 1460.000000 1452.000000 1460.000000 \n",
+ "mean 5.575342 1971.267808 1984.865753 103.685262 443.639726 \n",
+ "std 1.112799 30.202904 20.645407 181.066207 456.098091 \n",
+ "min 1.000000 1872.000000 1950.000000 0.000000 0.000000 \n",
+ "25% 5.000000 1954.000000 1967.000000 0.000000 0.000000 \n",
+ "50% 5.000000 1973.000000 1994.000000 0.000000 383.500000 \n",
+ "75% 6.000000 2000.000000 2004.000000 166.000000 712.250000 \n",
+ "max 9.000000 2010.000000 2010.000000 1600.000000 5644.000000 \n",
+ "\n",
+ " ... WoodDeckSF OpenPorchSF EnclosedPorch 3SsnPorch \\\n",
+ "count ... 1460.000000 1460.000000 1460.000000 1460.000000 \n",
+ "mean ... 94.244521 46.660274 21.954110 3.409589 \n",
+ "std ... 125.338794 66.256028 61.119149 29.317331 \n",
+ "min ... 0.000000 0.000000 0.000000 0.000000 \n",
+ "25% ... 0.000000 0.000000 0.000000 0.000000 \n",
+ "50% ... 0.000000 25.000000 0.000000 0.000000 \n",
+ "75% ... 168.000000 68.000000 0.000000 0.000000 \n",
+ "max ... 857.000000 547.000000 552.000000 508.000000 \n",
+ "\n",
+ " ScreenPorch PoolArea MiscVal MoSold YrSold \\\n",
+ "count 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 \n",
+ "mean 15.060959 2.758904 43.489041 6.321918 2007.815753 \n",
+ "std 55.757415 40.177307 496.123024 2.703626 1.328095 \n",
+ "min 0.000000 0.000000 0.000000 1.000000 2006.000000 \n",
+ "25% 0.000000 0.000000 0.000000 5.000000 2007.000000 \n",
+ "50% 0.000000 0.000000 0.000000 6.000000 2008.000000 \n",
+ "75% 0.000000 0.000000 0.000000 8.000000 2009.000000 \n",
+ "max 480.000000 738.000000 15500.000000 12.000000 2010.000000 \n",
+ "\n",
+ " SalePrice \n",
+ "count 1460.000000 \n",
+ "mean 180921.195890 \n",
+ "std 79442.502883 \n",
+ "min 34900.000000 \n",
+ "25% 129975.000000 \n",
+ "50% 163000.000000 \n",
+ "75% 214000.000000 \n",
+ "max 755000.000000 \n",
+ "\n",
+ "[8 rows x 38 columns]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "### Dostęp do danych"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "fragment"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Index([u'Id', u'MSSubClass', u'MSZoning', u'LotFrontage', u'LotArea',\n",
+ " u'Street', u'Alley', u'LotShape', u'LandContour', u'Utilities',\n",
+ " u'LotConfig', u'LandSlope', u'Neighborhood', u'Condition1',\n",
+ " u'Condition2', u'BldgType', u'HouseStyle', u'OverallQual',\n",
+ " u'OverallCond', u'YearBuilt', u'YearRemodAdd', u'RoofStyle',\n",
+ " u'RoofMatl', u'Exterior1st', u'Exterior2nd', u'MasVnrType',\n",
+ " u'MasVnrArea', u'ExterQual', u'ExterCond', u'Foundation', u'BsmtQual',\n",
+ " u'BsmtCond', u'BsmtExposure', u'BsmtFinType1', u'BsmtFinSF1',\n",
+ " u'BsmtFinType2', u'BsmtFinSF2', u'BsmtUnfSF', u'TotalBsmtSF',\n",
+ " u'Heating', u'HeatingQC', u'CentralAir', u'Electrical', u'1stFlrSF',\n",
+ " u'2ndFlrSF', u'LowQualFinSF', u'GrLivArea', u'BsmtFullBath',\n",
+ " u'BsmtHalfBath', u'FullBath', u'HalfBath', u'BedroomAbvGr',\n",
+ " u'KitchenAbvGr', u'KitchenQual', u'TotRmsAbvGrd', u'Functional',\n",
+ " u'Fireplaces', u'FireplaceQu', u'GarageType', u'GarageYrBlt',\n",
+ " u'GarageFinish', u'GarageCars', u'GarageArea', u'GarageQual',\n",
+ " u'GarageCond', u'PavedDrive', u'WoodDeckSF', u'OpenPorchSF',\n",
+ " u'EnclosedPorch', u'3SsnPorch', u'ScreenPorch', u'PoolArea', u'PoolQC',\n",
+ " u'Fence', u'MiscFeature', u'MiscVal', u'MoSold', u'YrSold', u'SaleType',\n",
+ " u'SaleCondition', u'SalePrice'],\n",
+ " dtype='object')\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(data.columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0 60\n",
+ "1 20\n",
+ "2 60\n",
+ "3 70\n",
+ "4 60\n",
+ "Name: MSSubClass, dtype: int64\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(data['MSSubClass'].head())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " MSSubClass SalePrice\n",
+ "0 60 208500\n",
+ "1 20 181500\n",
+ "2 60 223500\n",
+ "3 70 140000\n",
+ "4 60 250000\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(data[['MSSubClass', 'SalePrice']].head())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " MSSubClass | \n",
+ " MSZoning | \n",
+ " LotFrontage | \n",
+ " LotArea | \n",
+ " Street | \n",
+ " Alley | \n",
+ " LotShape | \n",
+ " LandContour | \n",
+ " Utilities | \n",
+ " ... | \n",
+ " PoolArea | \n",
+ " PoolQC | \n",
+ " Fence | \n",
+ " MiscFeature | \n",
+ " MiscVal | \n",
+ " MoSold | \n",
+ " YrSold | \n",
+ " SaleType | \n",
+ " SaleCondition | \n",
+ " SalePrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 65.0 | \n",
+ " 8450 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 208500 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 70 | \n",
+ " RL | \n",
+ " 60.0 | \n",
+ " 9550 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2006 | \n",
+ " WD | \n",
+ " Abnorml | \n",
+ " 140000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2 rows × 81 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
+ "0 1 60 RL 65.0 8450 Pave NaN Reg \n",
+ "3 4 70 RL 60.0 9550 Pave NaN IR1 \n",
+ "\n",
+ " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n",
+ "0 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "3 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "\n",
+ " MoSold YrSold SaleType SaleCondition SalePrice \n",
+ "0 2 2008 WD Normal 208500 \n",
+ "3 2 2006 WD Abnorml 140000 \n",
+ "\n",
+ "[2 rows x 81 columns]"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.loc[[0,3]]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " MSSubClass | \n",
+ " MSZoning | \n",
+ " LotFrontage | \n",
+ " LotArea | \n",
+ " Street | \n",
+ " Alley | \n",
+ " LotShape | \n",
+ " LandContour | \n",
+ " Utilities | \n",
+ " ... | \n",
+ " PoolArea | \n",
+ " PoolQC | \n",
+ " Fence | \n",
+ " MiscFeature | \n",
+ " MiscVal | \n",
+ " MoSold | \n",
+ " YrSold | \n",
+ " SaleType | \n",
+ " SaleCondition | \n",
+ " SalePrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 65.0 | \n",
+ " 8450 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 208500 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 80.0 | \n",
+ " 9600 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 2007 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 181500 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 68.0 | \n",
+ " 11250 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 9 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 223500 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 70 | \n",
+ " RL | \n",
+ " 60.0 | \n",
+ " 9550 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2006 | \n",
+ " WD | \n",
+ " Abnorml | \n",
+ " 140000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 84.0 | \n",
+ " 14260 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 12 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 250000 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 50 | \n",
+ " RL | \n",
+ " 85.0 | \n",
+ " 14115 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " MnPrv | \n",
+ " Shed | \n",
+ " 700 | \n",
+ " 10 | \n",
+ " 2009 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 143000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
6 rows × 81 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
+ "0 1 60 RL 65.0 8450 Pave NaN Reg \n",
+ "1 2 20 RL 80.0 9600 Pave NaN Reg \n",
+ "2 3 60 RL 68.0 11250 Pave NaN IR1 \n",
+ "3 4 70 RL 60.0 9550 Pave NaN IR1 \n",
+ "4 5 60 RL 84.0 14260 Pave NaN IR1 \n",
+ "5 6 50 RL 85.0 14115 Pave NaN IR1 \n",
+ "\n",
+ " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n",
+ "0 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "1 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "2 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "3 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "4 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "5 Lvl AllPub ... 0 NaN MnPrv Shed 700 \n",
+ "\n",
+ " MoSold YrSold SaleType SaleCondition SalePrice \n",
+ "0 2 2008 WD Normal 208500 \n",
+ "1 5 2007 WD Normal 181500 \n",
+ "2 9 2008 WD Normal 223500 \n",
+ "3 2 2006 WD Abnorml 140000 \n",
+ "4 12 2008 WD Normal 250000 \n",
+ "5 10 2009 WD Normal 143000 \n",
+ "\n",
+ "[6 rows x 81 columns]"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.loc[0:5]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " MSSubClass | \n",
+ " MSZoning | \n",
+ " LotFrontage | \n",
+ " LotArea | \n",
+ " Street | \n",
+ " Alley | \n",
+ " LotShape | \n",
+ " LandContour | \n",
+ " Utilities | \n",
+ " ... | \n",
+ " PoolArea | \n",
+ " PoolQC | \n",
+ " Fence | \n",
+ " MiscFeature | \n",
+ " MiscVal | \n",
+ " MoSold | \n",
+ " YrSold | \n",
+ " SaleType | \n",
+ " SaleCondition | \n",
+ " SalePrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 65.0 | \n",
+ " 8450 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 208500 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 80.0 | \n",
+ " 9600 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 2007 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 181500 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 68.0 | \n",
+ " 11250 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 9 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 223500 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 70 | \n",
+ " RL | \n",
+ " 60.0 | \n",
+ " 9550 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2006 | \n",
+ " WD | \n",
+ " Abnorml | \n",
+ " 140000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 84.0 | \n",
+ " 14260 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 12 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 250000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 81 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
+ "0 1 60 RL 65.0 8450 Pave NaN Reg \n",
+ "1 2 20 RL 80.0 9600 Pave NaN Reg \n",
+ "2 3 60 RL 68.0 11250 Pave NaN IR1 \n",
+ "3 4 70 RL 60.0 9550 Pave NaN IR1 \n",
+ "4 5 60 RL 84.0 14260 Pave NaN IR1 \n",
+ "\n",
+ " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n",
+ "0 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "1 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "2 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "3 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "4 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "\n",
+ " MoSold YrSold SaleType SaleCondition SalePrice \n",
+ "0 2 2008 WD Normal 208500 \n",
+ "1 5 2007 WD Normal 181500 \n",
+ "2 9 2008 WD Normal 223500 \n",
+ "3 2 2006 WD Abnorml 140000 \n",
+ "4 12 2008 WD Normal 250000 \n",
+ "\n",
+ "[5 rows x 81 columns]"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data[data['MSZoning'] == 'RL'].head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " MSSubClass | \n",
+ " MSZoning | \n",
+ " LotFrontage | \n",
+ " LotArea | \n",
+ " Street | \n",
+ " Alley | \n",
+ " LotShape | \n",
+ " LandContour | \n",
+ " Utilities | \n",
+ " ... | \n",
+ " PoolArea | \n",
+ " PoolQC | \n",
+ " Fence | \n",
+ " MiscFeature | \n",
+ " MiscVal | \n",
+ " MoSold | \n",
+ " YrSold | \n",
+ " SaleType | \n",
+ " SaleCondition | \n",
+ " SalePrice | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 65.0 | \n",
+ " 8450 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 208500 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 80.0 | \n",
+ " 9600 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 2007 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 181500 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 7 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 75.0 | \n",
+ " 10084 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 8 | \n",
+ " 2007 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 307000 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 10 | \n",
+ " 190 | \n",
+ " RL | \n",
+ " 50.0 | \n",
+ " 7420 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 118000 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 11 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 70.0 | \n",
+ " 11200 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 129500 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 81 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
+ "0 1 60 RL 65.0 8450 Pave NaN Reg \n",
+ "1 2 20 RL 80.0 9600 Pave NaN Reg \n",
+ "6 7 20 RL 75.0 10084 Pave NaN Reg \n",
+ "9 10 190 RL 50.0 7420 Pave NaN Reg \n",
+ "10 11 20 RL 70.0 11200 Pave NaN Reg \n",
+ "\n",
+ " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n",
+ "0 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "1 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "6 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "9 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "10 Lvl AllPub ... 0 NaN NaN NaN 0 \n",
+ "\n",
+ " MoSold YrSold SaleType SaleCondition SalePrice \n",
+ "0 2 2008 WD Normal 208500 \n",
+ "1 5 2007 WD Normal 181500 \n",
+ "6 8 2007 WD Normal 307000 \n",
+ "9 1 2008 WD Normal 118000 \n",
+ "10 2 2008 WD Normal 129500 \n",
+ "\n",
+ "[5 rows x 81 columns]"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data[(data['MSZoning'] == 'RL') & (data['LotShape'] == 'Reg')].head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ceny = data['SalePrice']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "180921.19589041095"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ceny.mean()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "755000"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ceny.max()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'SalePrice'"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ceny.name"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "('Plus vat:', 0 256455.00\n",
+ "1 223245.00\n",
+ "2 274905.00\n",
+ "3 172200.00\n",
+ "4 307500.00\n",
+ "5 175890.00\n",
+ "6 377610.00\n",
+ "7 246000.00\n",
+ "8 159777.00\n",
+ "9 145140.00\n",
+ "10 159285.00\n",
+ "11 424350.00\n",
+ "12 177120.00\n",
+ "13 343785.00\n",
+ "14 193110.00\n",
+ "15 162360.00\n",
+ "16 183270.00\n",
+ "17 110700.00\n",
+ "18 195570.00\n",
+ "19 170970.00\n",
+ "20 400119.00\n",
+ "21 171462.00\n",
+ "22 282900.00\n",
+ "23 159777.00\n",
+ "24 189420.00\n",
+ "25 315249.00\n",
+ "26 165804.00\n",
+ "27 376380.00\n",
+ "28 255225.00\n",
+ "29 84255.00\n",
+ " ... \n",
+ "1430 236332.20\n",
+ "1431 176812.50\n",
+ "1432 79335.00\n",
+ "1433 229395.00\n",
+ "1434 196800.00\n",
+ "1435 214020.00\n",
+ "1436 148215.00\n",
+ "1437 485378.91\n",
+ "1438 184131.00\n",
+ "1439 242310.00\n",
+ "1440 234930.00\n",
+ "1441 183639.00\n",
+ "1442 381300.00\n",
+ "1443 148830.00\n",
+ "1444 220908.00\n",
+ "1445 158670.00\n",
+ "1446 194217.00\n",
+ "1447 295200.00\n",
+ "1448 137760.00\n",
+ "1449 113160.00\n",
+ "1450 167280.00\n",
+ "1451 353120.70\n",
+ "1452 178350.00\n",
+ "1453 103935.00\n",
+ "1454 227550.00\n",
+ "1455 215250.00\n",
+ "1456 258300.00\n",
+ "1457 327795.00\n",
+ "1458 174813.75\n",
+ "1459 181425.00\n",
+ "Name: SalePrice, Length: 1460, dtype: float64)\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Plus vat:\", ceny * 1.23)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array(['RL', 'RM', 'C (all)', 'FV', 'RH'], dtype=object)"
+ ]
+ },
+ "execution_count": 35,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.MSZoning.unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "RL 1151\n",
+ "RM 218\n",
+ "FV 65\n",
+ "RH 16\n",
+ "C (all) 10\n",
+ "Name: MSZoning, dtype: int64"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.MSZoning.value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data['nowa'] = ceny * 1.23"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " MSSubClass | \n",
+ " MSZoning | \n",
+ " LotFrontage | \n",
+ " Street | \n",
+ " Alley | \n",
+ " LotShape | \n",
+ " LandContour | \n",
+ " Utilities | \n",
+ " LotConfig | \n",
+ " LandSlope | \n",
+ " ... | \n",
+ " PoolQC | \n",
+ " Fence | \n",
+ " MiscFeature | \n",
+ " MiscVal | \n",
+ " MoSold | \n",
+ " YrSold | \n",
+ " SaleType | \n",
+ " SaleCondition | \n",
+ " SalePrice | \n",
+ " nowa | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 65.0 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " Inside | \n",
+ " Gtl | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 208500 | \n",
+ " 256455.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 80.0 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " FR2 | \n",
+ " Gtl | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 2007 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 181500 | \n",
+ " 223245.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 68.0 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " Inside | \n",
+ " Gtl | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 9 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 223500 | \n",
+ " 274905.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 70 | \n",
+ " RL | \n",
+ " 60.0 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " Corner | \n",
+ " Gtl | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2006 | \n",
+ " WD | \n",
+ " Abnorml | \n",
+ " 140000 | \n",
+ " 172200.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 84.0 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " FR2 | \n",
+ " Gtl | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 12 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 250000 | \n",
+ " 307500.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 80 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " MSSubClass MSZoning LotFrontage Street Alley LotShape LandContour \\\n",
+ "0 60 RL 65.0 Pave NaN Reg Lvl \n",
+ "1 20 RL 80.0 Pave NaN Reg Lvl \n",
+ "2 60 RL 68.0 Pave NaN IR1 Lvl \n",
+ "3 70 RL 60.0 Pave NaN IR1 Lvl \n",
+ "4 60 RL 84.0 Pave NaN IR1 Lvl \n",
+ "\n",
+ " Utilities LotConfig LandSlope ... PoolQC Fence MiscFeature MiscVal \\\n",
+ "0 AllPub Inside Gtl ... NaN NaN NaN 0 \n",
+ "1 AllPub FR2 Gtl ... NaN NaN NaN 0 \n",
+ "2 AllPub Inside Gtl ... NaN NaN NaN 0 \n",
+ "3 AllPub Corner Gtl ... NaN NaN NaN 0 \n",
+ "4 AllPub FR2 Gtl ... NaN NaN NaN 0 \n",
+ "\n",
+ " MoSold YrSold SaleType SaleCondition SalePrice nowa \n",
+ "0 2 2008 WD Normal 208500 256455.0 \n",
+ "1 5 2007 WD Normal 181500 223245.0 \n",
+ "2 9 2008 WD Normal 223500 274905.0 \n",
+ "3 2 2006 WD Abnorml 140000 172200.0 \n",
+ "4 12 2008 WD Normal 250000 307500.0 \n",
+ "\n",
+ "[5 rows x 80 columns]"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.drop('LotArea', axis=1)\n",
+ "data.drop(['Id', 'LotArea'], axis=1).head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " MSSubClass | \n",
+ " MSZoning | \n",
+ " LotFrontage | \n",
+ " LotArea | \n",
+ " Street | \n",
+ " Alley | \n",
+ " LotShape | \n",
+ " LandContour | \n",
+ " Utilities | \n",
+ " ... | \n",
+ " PoolQC | \n",
+ " Fence | \n",
+ " MiscFeature | \n",
+ " MiscVal | \n",
+ " MoSold | \n",
+ " YrSold | \n",
+ " SaleType | \n",
+ " SaleCondition | \n",
+ " SalePrice | \n",
+ " nowa | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 20 | \n",
+ " RL | \n",
+ " 80.0 | \n",
+ " 9600 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " Reg | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 5 | \n",
+ " 2007 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 181500 | \n",
+ " 223245.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 68.0 | \n",
+ " 11250 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 9 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 223500 | \n",
+ " 274905.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 70 | \n",
+ " RL | \n",
+ " 60.0 | \n",
+ " 9550 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 2006 | \n",
+ " WD | \n",
+ " Abnorml | \n",
+ " 140000 | \n",
+ " 172200.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 60 | \n",
+ " RL | \n",
+ " 84.0 | \n",
+ " 14260 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " 12 | \n",
+ " 2008 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 250000 | \n",
+ " 307500.0 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 50 | \n",
+ " RL | \n",
+ " 85.0 | \n",
+ " 14115 | \n",
+ " Pave | \n",
+ " NaN | \n",
+ " IR1 | \n",
+ " Lvl | \n",
+ " AllPub | \n",
+ " ... | \n",
+ " NaN | \n",
+ " MnPrv | \n",
+ " Shed | \n",
+ " 700 | \n",
+ " 10 | \n",
+ " 2009 | \n",
+ " WD | \n",
+ " Normal | \n",
+ " 143000 | \n",
+ " 175890.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 82 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
+ "1 2 20 RL 80.0 9600 Pave NaN Reg \n",
+ "2 3 60 RL 68.0 11250 Pave NaN IR1 \n",
+ "3 4 70 RL 60.0 9550 Pave NaN IR1 \n",
+ "4 5 60 RL 84.0 14260 Pave NaN IR1 \n",
+ "5 6 50 RL 85.0 14115 Pave NaN IR1 \n",
+ "\n",
+ " LandContour Utilities ... PoolQC Fence MiscFeature MiscVal MoSold \\\n",
+ "1 Lvl AllPub ... NaN NaN NaN 0 5 \n",
+ "2 Lvl AllPub ... NaN NaN NaN 0 9 \n",
+ "3 Lvl AllPub ... NaN NaN NaN 0 2 \n",
+ "4 Lvl AllPub ... NaN NaN NaN 0 12 \n",
+ "5 Lvl AllPub ... NaN MnPrv Shed 700 10 \n",
+ "\n",
+ " YrSold SaleType SaleCondition SalePrice nowa \n",
+ "1 2007 WD Normal 181500 223245.0 \n",
+ "2 2008 WD Normal 223500 274905.0 \n",
+ "3 2006 WD Abnorml 140000 172200.0 \n",
+ "4 2008 WD Normal 250000 307500.0 \n",
+ "5 2009 WD Normal 143000 175890.0 \n",
+ "\n",
+ "[5 rows x 82 columns]"
+ ]
+ },
+ "execution_count": 44,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.drop(0).head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "celltoolbar": "Slideshow",
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/labs06/README.md b/labs06/README.md
new file mode 100644
index 0000000..e35b68f
--- /dev/null
+++ b/labs06/README.md
@@ -0,0 +1,18 @@
+## Zadania
+
+** zad. 0 **
+Sprawdź, czy masz zainstalowany pakiet ``pandas``. Jeżeli nie, zainstaluj go.
+
+** zad. 2 (domowe) **
+Jest to zadanie złożone, składające się z kilku części. Całość będzie opierać się o dane zawarte w pliku *mieszkania.csv* i dotyczą cen mieszkań w Poznaniu kilka lat temu.
+ 1, Otwórz plik ``task02.py``, który zawiera szkielet kodu, który będziemy rozwijać w tym zadaniu.
+ 1. Napisz funkcje, która wczyta zestaw danych z pliku *mieszkania.csv* i zwróci obiekt typu *DataFrame*. Jeżeli wszystko zostało zrobione poprawnie, powinno się wyśtwietlić 5 pierwszych wierszy.
+ 1. Uzupełnij funkcję ``most_common_room_number``, która zwróci jaka jest najpopularniejsza liczba pokoi w ogłoszeniach. Funkcji powinna zwrócić liczbę całkowitą.
+ 1. Uzupełnij kod w funkcji ``cheapest_flats(dane, n)``, która wzróci *n* najtańszych ofert mieszkań. Wzrócony obiekt typu ``DataFrame``.
+ 1. Napisz funkcje ``find_borough(desc)``, która przyjmuje 1 argument typu *string* i zwróci jedną z dzielnic zdefiniowaną w liście ``dzielnice``. Funkcja ma zwrócić pierwszą (wzgledem kolejności) nazwę dzielnicy, która jest zawarta w ``desc``. Jeżeli żadna nazwa nie została odnaleziona, zwróć *Inne*.
+ 1. Dodaj kolumnę ``Borough``, która będzie zawierać informacje o dzielnicach i powstanie z kolumny ``Localization``. Wykorzystaj do tego funkcję ``find_borough``.
+ 1. Uzupełnił funkcje ``write_plot``, która zapisze do pliku ``filename`` wykres słupkowy przedstawiający liczbę ogłoszeń mieszkań z podziałem na dzielnice.
+ 1. Napisz funkcje ``mean_price``, która zwróci średnią cenę mieszkania ``room_numer``-pokojowego.
+ 1. Uzupełnij funkcje ``find_13``, która zwróci listę dzielnic, które zawierają ofertę mieszkanie na 13 piętrze.
+ 1. Napisz funkcje ``find_best_flats``, która zwróci wszystkie ogłoszenia mieszkań, które znajdują się na Winogradach, mają 3 pokoje i są położone na 1 piętrze.
+ 1. *(dodatkowe)*: Korzystając z pakietu *sklearn* zbuduj model regresji liniowej, która będzie wyznaczać cenę mieszkania na podstawie wielkości mieszkania i liczby pokoi.
diff --git a/labs06/tasks.py b/labs06/tasks.py
new file mode 100755
index 0000000..0d38505
--- /dev/null
+++ b/labs06/tasks.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+"""
+1. Zaimportuj bibliotkę pandas jako pd.
+"""
+
+
+"""
+2. Wczytaj zbiór danych `bikes.csv` do zniennej data.
+"""
+
+
+"""
+3. Wyświetl 5 pierwszych wierszy z data.
+"""
+
+
+"""
+4. Wyświetl nazwy kolumn.
+"""
+
+
+"""
+5. Wyświetl ile nasz zbiór danych ma kolumn i wierszy.
+"""
+
+
+"""
+6. Wyświetl kolumnę 'City' z powyższego zbioru danych.
+"""
+
+
+"""
+7. Wyświetl jakie wartoścu przyjmuje kolumna 'City'.
+"""
+
+"""
+8. Wyświetl tabelę rozstawną kolumny City.
+"""
+
+
+"""
+9. Wyświetl tylko pierwsze 4 wiersze z wcześniejszego polecenia.
+"""
+
+
+"""
+10. Wyświetl, w ilu przypadkach kolumna City zawiera NaN.
+"""
+
+
+
+"""
+11. Wyświetl data.info()
+"""
+
+"""
+12. Wyświetl tylko kolumny Borough i Agency i tylko 5 ostatnich linii.
+"""
+
+
+"""
+13. Wyświetl tylko te dane, dla których wartość z kolumny Agency jest równa
+NYPD. Zlicz ile jest takich przykładów.
+"""
+
+"""
+14. Wyświetl wartość minimalną i maksymalną z kolumny Longitude.
+"""
+
+"""
+15. Dodaj kolumne diff, która powstanie przez sumowanie kolumn Longitude i Latitude.
+"""
+
+
+"""
+16. Wyświetl tablę rozstawną dla kolumny 'Descriptor', dla której Agency jest
+równe NYPD.
+"""
diff --git a/labs07/gapminder.csv b/labs07/gapminder.csv
new file mode 100644
index 0000000..534a004
--- /dev/null
+++ b/labs07/gapminder.csv
@@ -0,0 +1,177 @@
+,female_BMI,male_BMI,gdp,population,under5mortality,life_expectancy,fertility
+Afghanistan,21.07402,20.62058,1311.0,26528741.0,110.4,52.8,6.2
+Albania,25.65726,26.44657,8644.0,2968026.0,17.9,76.8,1.76
+Algeria,26.368409999999997,24.5962,12314.0,34811059.0,29.5,75.5,2.73
+Angola,23.48431,22.25083,7103.0,19842251.0,192.0,56.7,6.43
+Antigua and Barbuda,27.50545,25.76602,25736.0,85350.0,10.9,75.5,2.16
+Argentina,27.46523,27.5017,14646.0,40381860.0,15.4,75.4,2.24
+Armenia,27.1342,25.355420000000002,7383.0,2975029.0,20.0,72.3,1.4
+Australia,26.87777,27.56373,41312.0,21370348.0,5.2,81.6,1.96
+Austria,25.09414,26.467409999999997,43952.0,8331465.0,4.6,80.4,1.41
+Azerbaijan,27.50879,25.65117,14365.0,8868713.0,43.3,69.2,1.99
+Bahamas,29.13948,27.24594,24373.0,348587.0,14.5,72.2,1.89
+Bahrain,28.790940000000003,27.83721,42507.0,1115777.0,9.4,77.6,2.23
+Bangladesh,20.54531,20.39742,2265.0,148252473.0,55.9,68.3,2.38
+Barbados,29.221690000000002,26.384390000000003,16075.0,277315.0,15.4,75.3,1.83
+Belarus,26.641859999999998,26.16443,14488.0,9526453.0,7.2,70.0,1.42
+Belgium,25.1446,26.75915,41641.0,10779155.0,4.7,79.6,1.82
+Belize,29.81663,27.02255,8293.0,306165.0,20.1,70.7,2.91
+Benin,23.74026,22.41835,1646.0,8973525.0,116.3,59.7,5.27
+Bhutan,22.88243,22.8218,5663.0,694990.0,48.1,70.7,2.51
+Bolivia,26.8633,24.43335,5066.0,9599916.0,52.0,71.2,3.48
+Bosnia and Herzegovina,26.35874,26.611629999999998,9316.0,3839749.0,8.1,77.5,1.22
+Botswana,26.09156,22.129839999999998,13858.0,1967866.0,63.8,53.2,2.86
+Brazil,25.99113,25.78623,13906.0,194769696.0,18.6,73.2,1.9
+Brunei,22.892310000000002,24.18179,72351.0,380786.0,9.0,76.9,2.1
+Bulgaria,25.51574,26.542859999999997,15368.0,7513646.0,13.7,73.2,1.43
+Burkina Faso,21.63031,21.27157,1358.0,14709011.0,130.4,58.0,6.04
+Burundi,21.27927,21.50291,723.0,8821795.0,108.6,59.1,6.48
+Cambodia,21.69608,20.80496,2442.0,13933660.0,51.5,66.1,3.05
+Cameroon,24.9527,23.681729999999998,2571.0,19570418.0,113.8,56.6,5.17
+Canada,26.698290000000004,27.4521,41468.0,33363256.0,5.8,80.8,1.68
+Cape Verde,24.96136,23.515220000000003,6031.0,483824.0,28.4,70.4,2.57
+Chad,21.95424,21.485689999999998,1753.0,11139740.0,168.0,54.3,6.81
+Chile,27.92807,27.015420000000002,18698.0,16645940.0,8.9,78.5,1.89
+China,22.91041,22.92176,7880.0,1326690636.0,18.5,73.4,1.53
+Colombia,26.22529,24.94041,10489.0,44901660.0,19.7,76.2,2.43
+Comoros,22.444329999999997,22.06131,1440.0,665414.0,91.2,67.1,5.05
+"Congo, Dem. Rep.",21.6677,19.86692,607.0,61809278.0,124.5,57.5,6.45
+"Congo, Rep.",23.10824,21.87134,5022.0,3832771.0,72.6,58.8,5.1
+Costa Rica,27.03497,26.47897,12219.0,4429506.0,10.3,79.8,1.91
+Cote d'Ivoire,23.82088,22.56469,2854.0,19261647.0,116.9,55.4,4.91
+Croatia,25.17882,26.596290000000003,21873.0,4344151.0,5.9,76.2,1.43
+Cuba,26.576140000000002,25.06867,17765.0,11290239.0,6.3,77.6,1.5
+Cyprus,25.92587,27.41899,35828.0,1077010.0,4.2,80.0,1.49
+Denmark,25.106270000000002,26.13287,45017.0,5495302.0,4.3,78.9,1.89
+Djibouti,24.38177,23.38403,2502.0,809639.0,81.0,61.8,3.76
+Ecuador,27.062690000000003,25.58841,9244.0,14447600.0,26.8,74.7,2.73
+Egypt,30.099970000000003,26.732429999999997,9974.0,78976122.0,31.4,70.2,2.95
+El Salvador,27.84092,26.36751,7450.0,6004199.0,21.6,73.7,2.32
+Equatorial Guinea,24.528370000000002,23.7664,40143.0,686223.0,118.4,57.5,5.31
+Eritrea,21.082320000000003,20.885089999999998,1088.0,4500638.0,60.4,60.1,5.16
+Estonia,25.185979999999997,26.264459999999996,24743.0,1339941.0,5.5,74.2,1.62
+Ethiopia,20.71463,20.247,931.0,83079608.0,86.9,60.0,5.19
+Fiji,29.339409999999997,26.53078,7129.0,843206.0,24.0,64.9,2.74
+Finland,25.58418,26.733390000000004,42122.0,5314170.0,3.3,79.6,1.85
+France,24.82949,25.853289999999998,37505.0,62309529.0,4.3,81.1,1.97
+Gabon,25.95121,24.0762,15800.0,1473741.0,68.0,61.7,4.28
+Gambia,24.82101,21.65029,1566.0,1586749.0,87.4,65.7,5.8
+Georgia,26.45014,25.54942,5900.0,4343290.0,19.3,71.8,1.79
+Germany,25.73903,27.165090000000003,41199.0,80665906.0,4.4,80.0,1.37
+Ghana,24.33014,22.842470000000002,2907.0,23115919.0,79.9,62.0,4.19
+Greece,24.92026,26.33786,32197.0,11161755.0,4.9,80.2,1.46
+Grenada,27.31948,25.179879999999997,12116.0,103934.0,13.5,70.8,2.28
+Guatemala,26.84324,25.29947,6960.0,14106687.0,36.9,71.2,4.12
+Guinea,22.45206,22.52449,1230.0,10427356.0,121.0,57.1,5.34
+Guinea-Bissau,22.92809,21.64338,1326.0,1561293.0,127.6,53.6,5.25
+Guyana,26.470190000000002,23.68465,5208.0,748096.0,41.9,65.0,2.74
+Haiti,23.27785,23.66302,1600.0,9705130.0,83.3,61.0,3.5
+Honduras,26.73191,25.10872,4391.0,7259470.0,26.5,71.8,3.27
+"Hong Kong, China",23.71046,25.057470000000002,46635.0,6910384.0,3.06,82.49,1.04
+Hungary,25.97839,27.115679999999998,23334.0,10050699.0,7.2,73.9,1.33
+Iceland,26.02599,27.206870000000002,42294.0,310033.0,2.7,82.4,2.12
+India,21.31478,20.95956,3901.0,1197070109.0,65.6,64.7,2.64
+Indonesia,22.986929999999997,21.85576,7856.0,235360765.0,36.2,69.4,2.48
+Iran,27.236079999999998,25.310029999999998,15955.0,72530693.0,21.4,73.1,1.88
+Iraq,28.411170000000002,26.71017,11616.0,29163327.0,38.3,66.6,4.34
+Ireland,26.62176,27.65325,47713.0,4480145.0,4.5,80.1,2.0
+Israel,27.301920000000003,27.13151,28562.0,7093808.0,4.9,80.6,2.92
+Italy,24.79289,26.4802,37475.0,59319234.0,4.1,81.5,1.39
+Jamaica,27.22601,24.00421,8951.0,2717344.0,18.9,75.1,2.39
+Japan,21.87088,23.50004,34800.0,127317900.0,3.4,82.5,1.34
+Jordan,29.218009999999996,27.47362,10897.0,6010035.0,22.1,76.9,3.59
+Kazakhstan,26.65065,26.290779999999998,18797.0,15915966.0,25.9,67.1,2.51
+Kenya,23.06181,21.592579999999998,2358.0,38244442.0,71.0,60.8,4.76
+Kiribati,31.30769,29.2384,1803.0,98437.0,64.5,61.5,3.13
+Kuwait,31.161859999999997,29.172109999999996,91966.0,2705290.0,11.3,77.3,2.68
+Latvia,25.615129999999997,26.45693,20977.0,2144215.0,10.5,72.4,1.5
+Lebanon,27.70471,27.20117,14158.0,4109389.0,11.3,77.8,1.57
+Lesotho,26.780520000000003,21.90157,2041.0,1972194.0,114.2,44.5,3.34
+Liberia,23.21679,21.89537,588.0,3672782.0,100.9,59.9,5.19
+Libya,29.19874,26.54164,29853.0,6123022.0,18.8,75.6,2.64
+Lithuania,26.01424,26.86102,23223.0,3219802.0,8.2,72.1,1.42
+Luxembourg,26.09326,27.434040000000003,95001.0,485079.0,2.8,81.0,1.63
+"Macao, China",24.895039999999998,25.713820000000002,80191.0,507274.0,6.72,79.32,0.94
+"Macedonia, FYR",25.37646,26.34473,10872.0,2055266.0,11.8,74.5,1.47
+Madagascar,20.73501,21.403470000000002,1528.0,19926798.0,66.7,62.2,4.79
+Malawi,22.91455,22.034679999999998,674.0,13904671.0,101.1,52.4,5.78
+Malaysia,25.448320000000002,24.73069,19968.0,27197419.0,8.0,74.5,2.05
+Maldives,26.4132,23.219910000000002,12029.0,321026.0,16.0,78.5,2.38
+Mali,23.07655,21.78881,1602.0,14223403.0,148.3,58.5,6.82
+Malta,27.04993,27.683609999999998,27872.0,406392.0,6.6,80.7,1.38
+Mauritania,26.26476,22.62295,3356.0,3414552.0,103.0,67.9,4.94
+Mauritius,26.09824,25.15669,14615.0,1238013.0,15.8,72.9,1.58
+Mexico,28.737509999999997,27.42468,15826.0,114972821.0,17.9,75.4,2.35
+"Micronesia, Fed. Sts.",31.28402,28.10315,3197.0,104472.0,43.1,68.0,3.59
+Moldova,27.05617,24.2369,3890.0,4111168.0,17.6,70.4,1.49
+Mongolia,25.71375,24.88385,7563.0,2629666.0,34.8,64.8,2.37
+Montenegro,25.70186,26.55412,14183.0,619740.0,8.1,76.0,1.72
+Morocco,26.223090000000003,25.63182,6091.0,31350544.0,35.8,73.3,2.44
+Mozambique,23.317339999999998,21.93536,864.0,22994867.0,114.4,54.0,5.54
+Myanmar,22.47733,21.44932,2891.0,51030006.0,87.2,59.4,2.05
+Namibia,25.14988,22.65008,8169.0,2115703.0,62.2,59.1,3.36
+Nepal,20.72814,20.76344,1866.0,26325183.0,50.7,68.4,2.9
+Netherlands,25.47269,26.01541,47388.0,16519862.0,4.8,80.3,1.77
+New Zealand,27.36642,27.768929999999997,32122.0,4285380.0,6.4,80.3,2.12
+Nicaragua,27.57259,25.77291,4060.0,5594524.0,28.1,77.0,2.72
+Niger,21.95958,21.21958,843.0,15085130.0,141.3,58.0,7.59
+Nigeria,23.674020000000002,23.03322,4684.0,151115683.0,140.9,59.2,6.02
+Norway,25.73772,26.934240000000003,65216.0,4771633.0,3.6,80.8,1.96
+Oman,26.66535,26.241090000000003,47799.0,2652281.0,11.9,76.2,2.89
+Pakistan,23.44986,22.299139999999998,4187.0,163096985.0,95.5,64.1,3.58
+Panama,27.67758,26.26959,14033.0,3498679.0,21.0,77.3,2.61
+Papua New Guinea,25.77189,25.015060000000002,1982.0,6540267.0,69.7,58.6,4.07
+Paraguay,25.90523,25.54223,6684.0,6047131.0,25.7,74.0,3.06
+Peru,25.98511,24.770410000000002,9249.0,28642048.0,23.2,78.2,2.58
+Philippines,23.4671,22.872629999999997,5332.0,90297115.0,33.4,69.8,3.26
+Poland,25.918870000000002,26.6738,19996.0,38525752.0,6.7,75.4,1.33
+Portugal,26.183020000000003,26.68445,27747.0,10577458.0,4.1,79.4,1.36
+Puerto Rico,30.2212,28.378040000000002,35855.0,3728126.0,8.78,77.0,1.69
+Qatar,28.912509999999997,28.13138,126076.0,1388962.0,9.5,77.9,2.2
+Romania,25.22425,25.41069,18032.0,20741669.0,16.1,73.2,1.34
+Russia,27.21272,26.01131,22506.0,143123163.0,13.5,67.9,1.49
+Rwanda,22.07156,22.55453,1173.0,9750314.0,78.3,64.1,5.06
+Samoa,33.659079999999996,30.42475,5731.0,183440.0,18.8,72.3,4.43
+Sao Tome and Principe,24.88216,23.51233,2673.0,163595.0,61.0,66.0,4.41
+Saudi Arabia,29.598779999999998,27.884320000000002,44189.0,26742842.0,18.1,78.3,2.97
+Senegal,24.30968,21.927429999999998,2162.0,12229703.0,75.8,63.5,5.11
+Serbia,25.669970000000003,26.51495,12522.0,9109535.0,8.0,74.3,1.41
+Seychelles,27.973740000000003,25.56236,20065.0,91634.0,14.2,72.9,2.28
+Sierra Leone,23.93364,22.53139,1289.0,5521838.0,179.1,53.6,5.13
+Singapore,22.86642,23.83996,65991.0,4849641.0,2.8,80.6,1.28
+Slovak Republic,26.323729999999998,26.92717,24670.0,5396710.0,8.8,74.9,1.31
+Slovenia,26.582140000000003,27.43983,30816.0,2030599.0,3.7,78.7,1.43
+Solomon Islands,28.8762,27.159879999999998,1835.0,503410.0,33.1,62.3,4.36
+Somalia,22.66607,21.969170000000002,615.0,9132589.0,168.5,52.6,7.06
+South Africa,29.4803,26.85538,12263.0,50348811.0,66.1,53.4,2.54
+Spain,26.30554,27.49975,34676.0,45817016.0,5.0,81.1,1.42
+Sri Lanka,23.11717,21.96671,6907.0,19949553.0,11.7,74.0,2.32
+Sudan,23.16132,22.40484,3246.0,34470138.0,84.7,65.5,4.79
+Suriname,27.749859999999998,25.49887,13470.0,506657.0,26.4,70.2,2.41
+Swaziland,28.448859999999996,23.16969,5887.0,1153750.0,112.2,45.1,3.7
+Sweden,25.1466,26.37629,43421.0,9226333.0,3.2,81.1,1.92
+Switzerland,24.07242,26.20195,55020.0,7646542.0,4.7,82.0,1.47
+Syria,28.87418,26.919690000000003,6246.0,20097057.0,16.5,76.1,3.17
+Tajikistan,23.84799,23.77966,2001.0,7254072.0,56.2,69.6,3.7
+Tanzania,23.0843,22.47792,2030.0,42844744.0,72.4,60.4,5.54
+Thailand,24.38577,23.008029999999998,12216.0,66453255.0,15.6,73.9,1.48
+Timor-Leste,21.50694,20.59082,1486.0,1030915.0,70.2,69.9,6.48
+Togo,22.73858,21.87875,1219.0,6052937.0,96.4,57.5,4.88
+Tonga,34.25969,30.99563,4748.0,102816.0,17.0,70.3,4.01
+Trinidad and Tobago,28.27587,26.396690000000003,30875.0,1315372.0,24.9,71.7,1.8
+Tunisia,27.93706,25.15699,9938.0,10408091.0,19.4,76.8,2.04
+Turkey,28.247490000000003,26.703709999999997,16454.0,70344357.0,22.2,77.8,2.15
+Turkmenistan,24.66154,25.24796,8877.0,4917541.0,63.9,67.2,2.48
+Uganda,22.48126,22.35833,1437.0,31014427.0,89.3,56.0,6.34
+Ukraine,26.23317,25.42379,8762.0,46028476.0,12.9,67.8,1.38
+United Arab Emirates,29.614009999999997,28.053590000000003,73029.0,6900142.0,9.1,75.6,1.95
+United Kingdom,26.944490000000002,27.392490000000002,37739.0,61689620.0,5.6,79.7,1.87
+United States,28.343590000000003,28.456979999999998,50384.0,304473143.0,7.7,78.3,2.07
+Uruguay,26.593040000000002,26.39123,15317.0,3350832.0,13.0,76.0,2.11
+Uzbekistan,25.43432,25.32054,3733.0,26952719.0,49.2,69.6,2.46
+Vanuatu,28.458759999999998,26.78926,2944.0,225335.0,28.2,63.4,3.61
+Venezuela,28.134079999999997,27.445,17911.0,28116716.0,17.1,74.2,2.53
+Vietnam,21.065,20.9163,4085.0,86589342.0,26.2,74.1,1.86
+West Bank and Gaza,29.026429999999998,26.5775,3564.0,3854667.0,24.7,74.1,4.38
+Zambia,23.05436,20.68321,3039.0,13114579.0,94.9,51.1,5.88
+Zimbabwe,24.645220000000002,22.0266,1286.0,13495462.0,98.3,47.3,3.85
diff --git a/labs07/sklearn.ipynb b/labs07/sklearn.ipynb
new file mode 100644
index 0000000..c21dcbb
--- /dev/null
+++ b/labs07/sklearn.ipynb
@@ -0,0 +1,485 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "# Analiza danych w Pythonie: sklearn\n",
+ "\n",
+ "### Tomasz Dwojak\n",
+ "\n",
+ "### 3 czerwca 2018"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ " * Pierwsza część: pandas\n",
+ " * Druga część: sklearn"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "### Przypomnienie z UMZ\n",
+ " * przygotowanie i czyszczenie danych\n",
+ " * wybór i trening modelu\n",
+ " * tuning\n",
+ " * ewaluacja"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import sklearn\n",
+ "import pandas as pd\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "data = pd.read_csv(\"./gapminder.csv\", index_col=0)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " female_BMI | \n",
+ " male_BMI | \n",
+ " gdp | \n",
+ " population | \n",
+ " under5mortality | \n",
+ " life_expectancy | \n",
+ " fertility | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Afghanistan | \n",
+ " 21.07402 | \n",
+ " 20.62058 | \n",
+ " 1311.0 | \n",
+ " 26528741.0 | \n",
+ " 110.4 | \n",
+ " 52.8 | \n",
+ " 6.20 | \n",
+ "
\n",
+ " \n",
+ " Albania | \n",
+ " 25.65726 | \n",
+ " 26.44657 | \n",
+ " 8644.0 | \n",
+ " 2968026.0 | \n",
+ " 17.9 | \n",
+ " 76.8 | \n",
+ " 1.76 | \n",
+ "
\n",
+ " \n",
+ " Algeria | \n",
+ " 26.36841 | \n",
+ " 24.59620 | \n",
+ " 12314.0 | \n",
+ " 34811059.0 | \n",
+ " 29.5 | \n",
+ " 75.5 | \n",
+ " 2.73 | \n",
+ "
\n",
+ " \n",
+ " Angola | \n",
+ " 23.48431 | \n",
+ " 22.25083 | \n",
+ " 7103.0 | \n",
+ " 19842251.0 | \n",
+ " 192.0 | \n",
+ " 56.7 | \n",
+ " 6.43 | \n",
+ "
\n",
+ " \n",
+ " Antigua and Barbuda | \n",
+ " 27.50545 | \n",
+ " 25.76602 | \n",
+ " 25736.0 | \n",
+ " 85350.0 | \n",
+ " 10.9 | \n",
+ " 75.5 | \n",
+ " 2.16 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " female_BMI male_BMI gdp population \\\n",
+ "Afghanistan 21.07402 20.62058 1311.0 26528741.0 \n",
+ "Albania 25.65726 26.44657 8644.0 2968026.0 \n",
+ "Algeria 26.36841 24.59620 12314.0 34811059.0 \n",
+ "Angola 23.48431 22.25083 7103.0 19842251.0 \n",
+ "Antigua and Barbuda 27.50545 25.76602 25736.0 85350.0 \n",
+ "\n",
+ " under5mortality life_expectancy fertility \n",
+ "Afghanistan 110.4 52.8 6.20 \n",
+ "Albania 17.9 76.8 1.76 \n",
+ "Algeria 29.5 75.5 2.73 \n",
+ "Angola 192.0 56.7 6.43 \n",
+ "Antigua and Barbuda 10.9 75.5 2.16 "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "data.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "y = data['life_expectancy']\n",
+ "X = data.drop('life_expectancy', axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "from sklearn.model_selection import train_test_split\n",
+ "train_X, test_X, train_y, test_y = \\\n",
+ " train_test_split(X, y, test_size=0.2, random_state=123, shuffle=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.linear_model import LinearRegression\n",
+ "model = LinearRegression()\n",
+ "model.fit(X,y)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([67.56279809, 76.25840076, 50.21126326, 59.21303855, 72.06348723])"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "predicted = model.predict(test_X)\n",
+ "predicted[:5]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "RMSE: 3.5179543848147863\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearn.metrics import mean_squared_error\n",
+ "rmse = np.sqrt(mean_squared_error(predicted, test_y))\n",
+ "print(\"RMSE:\", rmse)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.795295000468209"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ " r2 = model.score(test_X, test_y)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "source": [
+ "#### API\n",
+ " * model\n",
+ " * `fix`\n",
+ " * `predict`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "female_BMI: -1.18\n",
+ "male_BMI: 1.46\n",
+ "gdp: 5.11e-05\n",
+ "population: 7.21e-10\n",
+ "under5mortality: -0.159\n",
+ "fertility: 0.421\n"
+ ]
+ }
+ ],
+ "source": [
+ "for p in zip(train_X.columns, model.coef_):\n",
+ " print(\"{}: {:.3}\".format(p[0], p[1]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/lib/python3.6/site-packages/ipykernel_launcher.py:2: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead\n",
+ " \n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model2 = LinearRegression()\n",
+ "model2.fit(train_X['male_BMI'].reshape(-1, 1), train_y)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.5852413468462743"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "model2.intercept_"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {
+ "slideshow": {
+ "slide_type": "slide"
+ }
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/usr/lib/python3.6/site-packages/ipykernel_launcher.py:5: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead\n",
+ " \"\"\"\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from matplotlib import pyplot as plt\n",
+ "%matplotlib inline\n",
+ "\n",
+ "plt.scatter(train_X['male_BMI'], train_y,color='g')\n",
+ "plt.plot(train_X['male_BMI'], model2.predict(train_X['male_BMI'].reshape(-1, 1)),color='k')\n",
+ "\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "celltoolbar": "Slideshow",
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}