diff --git a/labs05/data/iowa.csv.gz b/labs05/data/iowa.csv.gz new file mode 100644 index 0000000..935b981 Binary files /dev/null and b/labs05/data/iowa.csv.gz differ diff --git a/labs05/pandas_wprowadzenie.ipynb b/labs05/pandas_wprowadzenie.ipynb new file mode 100644 index 0000000..a92bc13 --- /dev/null +++ b/labs05/pandas_wprowadzenie.ipynb @@ -0,0 +1,2309 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Analiza danych w Pythonie\n", + "\n", + "### Tomasz Dwojak\n", + "\n", + "### 3 czerwca 2018" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Analiza danych:\n", + "\n", + " * R\n", + " * Python" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Python Ekosystem\n", + "\n", + " * pandas: ramka danych\n", + " * sklearn: modele ML\n", + " * numpy: obliczenia\n", + " * matplotlib: wykresy" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [], + "source": [ + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Typy danych\n", + "\n", + " * Szereg (`pd.Series`)\n", + " * Ramka danych (`pd.DataFrame`)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Wczytanie danych" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [], + "source": [ + "data = pd.read_csv(\"./data/iowa.csv.gz\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdMSSubClassMSZoningLotFrontageLotAreaStreetAlleyLotShapeLandContourUtilities...PoolAreaPoolQCFenceMiscFeatureMiscValMoSoldYrSoldSaleTypeSaleConditionSalePrice
0160RL65.08450PaveNaNRegLvlAllPub...0NaNNaNNaN022008WDNormal208500
1220RL80.09600PaveNaNRegLvlAllPub...0NaNNaNNaN052007WDNormal181500
2360RL68.011250PaveNaNIR1LvlAllPub...0NaNNaNNaN092008WDNormal223500
3470RL60.09550PaveNaNIR1LvlAllPub...0NaNNaNNaN022006WDAbnorml140000
4560RL84.014260PaveNaNIR1LvlAllPub...0NaNNaNNaN0122008WDNormal250000
\n", + "

5 rows × 81 columns

\n", + "
" + ], + "text/plain": [ + " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n", + "0 1 60 RL 65.0 8450 Pave NaN Reg \n", + "1 2 20 RL 80.0 9600 Pave NaN Reg \n", + "2 3 60 RL 68.0 11250 Pave NaN IR1 \n", + "3 4 70 RL 60.0 9550 Pave NaN IR1 \n", + "4 5 60 RL 84.0 14260 Pave NaN IR1 \n", + "\n", + " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n", + "0 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "1 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "2 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "3 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "4 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "\n", + " MoSold YrSold SaleType SaleCondition SalePrice \n", + "0 2 2008 WD Normal 208500 \n", + "1 5 2007 WD Normal 181500 \n", + "2 9 2008 WD Normal 223500 \n", + "3 2 2006 WD Abnorml 140000 \n", + "4 12 2008 WD Normal 250000 \n", + "\n", + "[5 rows x 81 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1460, 81)\n" + ] + } + ], + "source": [ + "shape = data.shape\n", + "rows = shape[0]\n", + "cols = shape[1]\n", + "\n", + "print(rows, cols)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 1460 entries, 0 to 1459\n", + "Data columns (total 81 columns):\n", + "Id 1460 non-null int64\n", + "MSSubClass 1460 non-null int64\n", + "MSZoning 1460 non-null object\n", + "LotFrontage 1201 non-null float64\n", + "LotArea 1460 non-null int64\n", + "Street 1460 non-null object\n", + "Alley 91 non-null object\n", + "LotShape 1460 non-null object\n", + "LandContour 1460 non-null object\n", + "Utilities 1460 non-null object\n", + "LotConfig 1460 non-null object\n", + "LandSlope 1460 non-null object\n", + "Neighborhood 1460 non-null object\n", + "Condition1 1460 non-null object\n", + "Condition2 1460 non-null object\n", + "BldgType 1460 non-null object\n", + "HouseStyle 1460 non-null object\n", + "OverallQual 1460 non-null int64\n", + "OverallCond 1460 non-null int64\n", + "YearBuilt 1460 non-null int64\n", + "YearRemodAdd 1460 non-null int64\n", + "RoofStyle 1460 non-null object\n", + "RoofMatl 1460 non-null object\n", + "Exterior1st 1460 non-null object\n", + "Exterior2nd 1460 non-null object\n", + "MasVnrType 1452 non-null object\n", + "MasVnrArea 1452 non-null float64\n", + "ExterQual 1460 non-null object\n", + "ExterCond 1460 non-null object\n", + "Foundation 1460 non-null object\n", + "BsmtQual 1423 non-null object\n", + "BsmtCond 1423 non-null object\n", + "BsmtExposure 1422 non-null object\n", + "BsmtFinType1 1423 non-null object\n", + "BsmtFinSF1 1460 non-null int64\n", + "BsmtFinType2 1422 non-null object\n", + "BsmtFinSF2 1460 non-null int64\n", + "BsmtUnfSF 1460 non-null int64\n", + "TotalBsmtSF 1460 non-null int64\n", + "Heating 1460 non-null object\n", + "HeatingQC 1460 non-null object\n", + "CentralAir 1460 non-null object\n", + "Electrical 1459 non-null object\n", + "1stFlrSF 1460 non-null int64\n", + "2ndFlrSF 1460 non-null int64\n", + "LowQualFinSF 1460 non-null int64\n", + "GrLivArea 1460 non-null int64\n", + "BsmtFullBath 1460 non-null int64\n", + "BsmtHalfBath 1460 non-null int64\n", + "FullBath 1460 non-null int64\n", + "HalfBath 1460 non-null int64\n", + "BedroomAbvGr 1460 non-null int64\n", + "KitchenAbvGr 1460 non-null int64\n", + "KitchenQual 1460 non-null object\n", + "TotRmsAbvGrd 1460 non-null int64\n", + "Functional 1460 non-null object\n", + "Fireplaces 1460 non-null int64\n", + "FireplaceQu 770 non-null object\n", + "GarageType 1379 non-null object\n", + "GarageYrBlt 1379 non-null float64\n", + "GarageFinish 1379 non-null object\n", + "GarageCars 1460 non-null int64\n", + "GarageArea 1460 non-null int64\n", + "GarageQual 1379 non-null object\n", + "GarageCond 1379 non-null object\n", + "PavedDrive 1460 non-null object\n", + "WoodDeckSF 1460 non-null int64\n", + "OpenPorchSF 1460 non-null int64\n", + "EnclosedPorch 1460 non-null int64\n", + "3SsnPorch 1460 non-null int64\n", + "ScreenPorch 1460 non-null int64\n", + "PoolArea 1460 non-null int64\n", + "PoolQC 7 non-null object\n", + "Fence 281 non-null object\n", + "MiscFeature 54 non-null object\n", + "MiscVal 1460 non-null int64\n", + "MoSold 1460 non-null int64\n", + "YrSold 1460 non-null int64\n", + "SaleType 1460 non-null object\n", + "SaleCondition 1460 non-null object\n", + "SalePrice 1460 non-null int64\n", + "dtypes: float64(3), int64(35), object(43)\n", + "memory usage: 924.0+ KB\n" + ] + } + ], + "source": [ + "data.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdMSSubClassLotFrontageLotAreaOverallQualOverallCondYearBuiltYearRemodAddMasVnrAreaBsmtFinSF1...WoodDeckSFOpenPorchSFEnclosedPorch3SsnPorchScreenPorchPoolAreaMiscValMoSoldYrSoldSalePrice
count1460.0000001460.0000001201.0000001460.0000001460.0000001460.0000001460.0000001460.0000001452.0000001460.000000...1460.0000001460.0000001460.0000001460.0000001460.0000001460.0000001460.0000001460.0000001460.0000001460.000000
mean730.50000056.89726070.04995810516.8280826.0993155.5753421971.2678081984.865753103.685262443.639726...94.24452146.66027421.9541103.40958915.0609592.75890443.4890416.3219182007.815753180921.195890
std421.61000942.30057124.2847529981.2649321.3829971.11279930.20290420.645407181.066207456.098091...125.33879466.25602861.11914929.31733155.75741540.177307496.1230242.7036261.32809579442.502883
min1.00000020.00000021.0000001300.0000001.0000001.0000001872.0000001950.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000001.0000002006.00000034900.000000
25%365.75000020.00000059.0000007553.5000005.0000005.0000001954.0000001967.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000005.0000002007.000000129975.000000
50%730.50000050.00000069.0000009478.5000006.0000005.0000001973.0000001994.0000000.000000383.500000...0.00000025.0000000.0000000.0000000.0000000.0000000.0000006.0000002008.000000163000.000000
75%1095.25000070.00000080.00000011601.5000007.0000006.0000002000.0000002004.000000166.000000712.250000...168.00000068.0000000.0000000.0000000.0000000.0000000.0000008.0000002009.000000214000.000000
max1460.000000190.000000313.000000215245.00000010.0000009.0000002010.0000002010.0000001600.0000005644.000000...857.000000547.000000552.000000508.000000480.000000738.00000015500.00000012.0000002010.000000755000.000000
\n", + "

8 rows × 38 columns

\n", + "
" + ], + "text/plain": [ + " Id MSSubClass LotFrontage LotArea OverallQual \\\n", + "count 1460.000000 1460.000000 1201.000000 1460.000000 1460.000000 \n", + "mean 730.500000 56.897260 70.049958 10516.828082 6.099315 \n", + "std 421.610009 42.300571 24.284752 9981.264932 1.382997 \n", + "min 1.000000 20.000000 21.000000 1300.000000 1.000000 \n", + "25% 365.750000 20.000000 59.000000 7553.500000 5.000000 \n", + "50% 730.500000 50.000000 69.000000 9478.500000 6.000000 \n", + "75% 1095.250000 70.000000 80.000000 11601.500000 7.000000 \n", + "max 1460.000000 190.000000 313.000000 215245.000000 10.000000 \n", + "\n", + " OverallCond YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 \\\n", + "count 1460.000000 1460.000000 1460.000000 1452.000000 1460.000000 \n", + "mean 5.575342 1971.267808 1984.865753 103.685262 443.639726 \n", + "std 1.112799 30.202904 20.645407 181.066207 456.098091 \n", + "min 1.000000 1872.000000 1950.000000 0.000000 0.000000 \n", + "25% 5.000000 1954.000000 1967.000000 0.000000 0.000000 \n", + "50% 5.000000 1973.000000 1994.000000 0.000000 383.500000 \n", + "75% 6.000000 2000.000000 2004.000000 166.000000 712.250000 \n", + "max 9.000000 2010.000000 2010.000000 1600.000000 5644.000000 \n", + "\n", + " ... WoodDeckSF OpenPorchSF EnclosedPorch 3SsnPorch \\\n", + "count ... 1460.000000 1460.000000 1460.000000 1460.000000 \n", + "mean ... 94.244521 46.660274 21.954110 3.409589 \n", + "std ... 125.338794 66.256028 61.119149 29.317331 \n", + "min ... 0.000000 0.000000 0.000000 0.000000 \n", + "25% ... 0.000000 0.000000 0.000000 0.000000 \n", + "50% ... 0.000000 25.000000 0.000000 0.000000 \n", + "75% ... 168.000000 68.000000 0.000000 0.000000 \n", + "max ... 857.000000 547.000000 552.000000 508.000000 \n", + "\n", + " ScreenPorch PoolArea MiscVal MoSold YrSold \\\n", + "count 1460.000000 1460.000000 1460.000000 1460.000000 1460.000000 \n", + "mean 15.060959 2.758904 43.489041 6.321918 2007.815753 \n", + "std 55.757415 40.177307 496.123024 2.703626 1.328095 \n", + "min 0.000000 0.000000 0.000000 1.000000 2006.000000 \n", + "25% 0.000000 0.000000 0.000000 5.000000 2007.000000 \n", + "50% 0.000000 0.000000 0.000000 6.000000 2008.000000 \n", + "75% 0.000000 0.000000 0.000000 8.000000 2009.000000 \n", + "max 480.000000 738.000000 15500.000000 12.000000 2010.000000 \n", + "\n", + " SalePrice \n", + "count 1460.000000 \n", + "mean 180921.195890 \n", + "std 79442.502883 \n", + "min 34900.000000 \n", + "25% 129975.000000 \n", + "50% 163000.000000 \n", + "75% 214000.000000 \n", + "max 755000.000000 \n", + "\n", + "[8 rows x 38 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Dostęp do danych" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "slideshow": { + "slide_type": "fragment" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index([u'Id', u'MSSubClass', u'MSZoning', u'LotFrontage', u'LotArea',\n", + " u'Street', u'Alley', u'LotShape', u'LandContour', u'Utilities',\n", + " u'LotConfig', u'LandSlope', u'Neighborhood', u'Condition1',\n", + " u'Condition2', u'BldgType', u'HouseStyle', u'OverallQual',\n", + " u'OverallCond', u'YearBuilt', u'YearRemodAdd', u'RoofStyle',\n", + " u'RoofMatl', u'Exterior1st', u'Exterior2nd', u'MasVnrType',\n", + " u'MasVnrArea', u'ExterQual', u'ExterCond', u'Foundation', u'BsmtQual',\n", + " u'BsmtCond', u'BsmtExposure', u'BsmtFinType1', u'BsmtFinSF1',\n", + " u'BsmtFinType2', u'BsmtFinSF2', u'BsmtUnfSF', u'TotalBsmtSF',\n", + " u'Heating', u'HeatingQC', u'CentralAir', u'Electrical', u'1stFlrSF',\n", + " u'2ndFlrSF', u'LowQualFinSF', u'GrLivArea', u'BsmtFullBath',\n", + " u'BsmtHalfBath', u'FullBath', u'HalfBath', u'BedroomAbvGr',\n", + " u'KitchenAbvGr', u'KitchenQual', u'TotRmsAbvGrd', u'Functional',\n", + " u'Fireplaces', u'FireplaceQu', u'GarageType', u'GarageYrBlt',\n", + " u'GarageFinish', u'GarageCars', u'GarageArea', u'GarageQual',\n", + " u'GarageCond', u'PavedDrive', u'WoodDeckSF', u'OpenPorchSF',\n", + " u'EnclosedPorch', u'3SsnPorch', u'ScreenPorch', u'PoolArea', u'PoolQC',\n", + " u'Fence', u'MiscFeature', u'MiscVal', u'MoSold', u'YrSold', u'SaleType',\n", + " u'SaleCondition', u'SalePrice'],\n", + " dtype='object')\n" + ] + } + ], + "source": [ + "print(data.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 60\n", + "1 20\n", + "2 60\n", + "3 70\n", + "4 60\n", + "Name: MSSubClass, dtype: int64\n" + ] + } + ], + "source": [ + "print(data['MSSubClass'].head())" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " MSSubClass SalePrice\n", + "0 60 208500\n", + "1 20 181500\n", + "2 60 223500\n", + "3 70 140000\n", + "4 60 250000\n" + ] + } + ], + "source": [ + "print(data[['MSSubClass', 'SalePrice']].head())" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdMSSubClassMSZoningLotFrontageLotAreaStreetAlleyLotShapeLandContourUtilities...PoolAreaPoolQCFenceMiscFeatureMiscValMoSoldYrSoldSaleTypeSaleConditionSalePrice
0160RL65.08450PaveNaNRegLvlAllPub...0NaNNaNNaN022008WDNormal208500
3470RL60.09550PaveNaNIR1LvlAllPub...0NaNNaNNaN022006WDAbnorml140000
\n", + "

2 rows × 81 columns

\n", + "
" + ], + "text/plain": [ + " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n", + "0 1 60 RL 65.0 8450 Pave NaN Reg \n", + "3 4 70 RL 60.0 9550 Pave NaN IR1 \n", + "\n", + " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n", + "0 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "3 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "\n", + " MoSold YrSold SaleType SaleCondition SalePrice \n", + "0 2 2008 WD Normal 208500 \n", + "3 2 2006 WD Abnorml 140000 \n", + "\n", + "[2 rows x 81 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.loc[[0,3]]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdMSSubClassMSZoningLotFrontageLotAreaStreetAlleyLotShapeLandContourUtilities...PoolAreaPoolQCFenceMiscFeatureMiscValMoSoldYrSoldSaleTypeSaleConditionSalePrice
0160RL65.08450PaveNaNRegLvlAllPub...0NaNNaNNaN022008WDNormal208500
1220RL80.09600PaveNaNRegLvlAllPub...0NaNNaNNaN052007WDNormal181500
2360RL68.011250PaveNaNIR1LvlAllPub...0NaNNaNNaN092008WDNormal223500
3470RL60.09550PaveNaNIR1LvlAllPub...0NaNNaNNaN022006WDAbnorml140000
4560RL84.014260PaveNaNIR1LvlAllPub...0NaNNaNNaN0122008WDNormal250000
5650RL85.014115PaveNaNIR1LvlAllPub...0NaNMnPrvShed700102009WDNormal143000
\n", + "

6 rows × 81 columns

\n", + "
" + ], + "text/plain": [ + " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n", + "0 1 60 RL 65.0 8450 Pave NaN Reg \n", + "1 2 20 RL 80.0 9600 Pave NaN Reg \n", + "2 3 60 RL 68.0 11250 Pave NaN IR1 \n", + "3 4 70 RL 60.0 9550 Pave NaN IR1 \n", + "4 5 60 RL 84.0 14260 Pave NaN IR1 \n", + "5 6 50 RL 85.0 14115 Pave NaN IR1 \n", + "\n", + " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n", + "0 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "1 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "2 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "3 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "4 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "5 Lvl AllPub ... 0 NaN MnPrv Shed 700 \n", + "\n", + " MoSold YrSold SaleType SaleCondition SalePrice \n", + "0 2 2008 WD Normal 208500 \n", + "1 5 2007 WD Normal 181500 \n", + "2 9 2008 WD Normal 223500 \n", + "3 2 2006 WD Abnorml 140000 \n", + "4 12 2008 WD Normal 250000 \n", + "5 10 2009 WD Normal 143000 \n", + "\n", + "[6 rows x 81 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.loc[0:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdMSSubClassMSZoningLotFrontageLotAreaStreetAlleyLotShapeLandContourUtilities...PoolAreaPoolQCFenceMiscFeatureMiscValMoSoldYrSoldSaleTypeSaleConditionSalePrice
0160RL65.08450PaveNaNRegLvlAllPub...0NaNNaNNaN022008WDNormal208500
1220RL80.09600PaveNaNRegLvlAllPub...0NaNNaNNaN052007WDNormal181500
2360RL68.011250PaveNaNIR1LvlAllPub...0NaNNaNNaN092008WDNormal223500
3470RL60.09550PaveNaNIR1LvlAllPub...0NaNNaNNaN022006WDAbnorml140000
4560RL84.014260PaveNaNIR1LvlAllPub...0NaNNaNNaN0122008WDNormal250000
\n", + "

5 rows × 81 columns

\n", + "
" + ], + "text/plain": [ + " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n", + "0 1 60 RL 65.0 8450 Pave NaN Reg \n", + "1 2 20 RL 80.0 9600 Pave NaN Reg \n", + "2 3 60 RL 68.0 11250 Pave NaN IR1 \n", + "3 4 70 RL 60.0 9550 Pave NaN IR1 \n", + "4 5 60 RL 84.0 14260 Pave NaN IR1 \n", + "\n", + " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n", + "0 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "1 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "2 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "3 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "4 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "\n", + " MoSold YrSold SaleType SaleCondition SalePrice \n", + "0 2 2008 WD Normal 208500 \n", + "1 5 2007 WD Normal 181500 \n", + "2 9 2008 WD Normal 223500 \n", + "3 2 2006 WD Abnorml 140000 \n", + "4 12 2008 WD Normal 250000 \n", + "\n", + "[5 rows x 81 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data['MSZoning'] == 'RL'].head()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdMSSubClassMSZoningLotFrontageLotAreaStreetAlleyLotShapeLandContourUtilities...PoolAreaPoolQCFenceMiscFeatureMiscValMoSoldYrSoldSaleTypeSaleConditionSalePrice
0160RL65.08450PaveNaNRegLvlAllPub...0NaNNaNNaN022008WDNormal208500
1220RL80.09600PaveNaNRegLvlAllPub...0NaNNaNNaN052007WDNormal181500
6720RL75.010084PaveNaNRegLvlAllPub...0NaNNaNNaN082007WDNormal307000
910190RL50.07420PaveNaNRegLvlAllPub...0NaNNaNNaN012008WDNormal118000
101120RL70.011200PaveNaNRegLvlAllPub...0NaNNaNNaN022008WDNormal129500
\n", + "

5 rows × 81 columns

\n", + "
" + ], + "text/plain": [ + " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n", + "0 1 60 RL 65.0 8450 Pave NaN Reg \n", + "1 2 20 RL 80.0 9600 Pave NaN Reg \n", + "6 7 20 RL 75.0 10084 Pave NaN Reg \n", + "9 10 190 RL 50.0 7420 Pave NaN Reg \n", + "10 11 20 RL 70.0 11200 Pave NaN Reg \n", + "\n", + " LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal \\\n", + "0 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "1 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "6 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "9 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "10 Lvl AllPub ... 0 NaN NaN NaN 0 \n", + "\n", + " MoSold YrSold SaleType SaleCondition SalePrice \n", + "0 2 2008 WD Normal 208500 \n", + "1 5 2007 WD Normal 181500 \n", + "6 8 2007 WD Normal 307000 \n", + "9 1 2008 WD Normal 118000 \n", + "10 2 2008 WD Normal 129500 \n", + "\n", + "[5 rows x 81 columns]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[(data['MSZoning'] == 'RL') & (data['LotShape'] == 'Reg')].head()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "ceny = data['SalePrice']" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "180921.19589041095" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ceny.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "755000" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ceny.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'SalePrice'" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ceny.name" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('Plus vat:', 0 256455.00\n", + "1 223245.00\n", + "2 274905.00\n", + "3 172200.00\n", + "4 307500.00\n", + "5 175890.00\n", + "6 377610.00\n", + "7 246000.00\n", + "8 159777.00\n", + "9 145140.00\n", + "10 159285.00\n", + "11 424350.00\n", + "12 177120.00\n", + "13 343785.00\n", + "14 193110.00\n", + "15 162360.00\n", + "16 183270.00\n", + "17 110700.00\n", + "18 195570.00\n", + "19 170970.00\n", + "20 400119.00\n", + "21 171462.00\n", + "22 282900.00\n", + "23 159777.00\n", + "24 189420.00\n", + "25 315249.00\n", + "26 165804.00\n", + "27 376380.00\n", + "28 255225.00\n", + "29 84255.00\n", + " ... \n", + "1430 236332.20\n", + "1431 176812.50\n", + "1432 79335.00\n", + "1433 229395.00\n", + "1434 196800.00\n", + "1435 214020.00\n", + "1436 148215.00\n", + "1437 485378.91\n", + "1438 184131.00\n", + "1439 242310.00\n", + "1440 234930.00\n", + "1441 183639.00\n", + "1442 381300.00\n", + "1443 148830.00\n", + "1444 220908.00\n", + "1445 158670.00\n", + "1446 194217.00\n", + "1447 295200.00\n", + "1448 137760.00\n", + "1449 113160.00\n", + "1450 167280.00\n", + "1451 353120.70\n", + "1452 178350.00\n", + "1453 103935.00\n", + "1454 227550.00\n", + "1455 215250.00\n", + "1456 258300.00\n", + "1457 327795.00\n", + "1458 174813.75\n", + "1459 181425.00\n", + "Name: SalePrice, Length: 1460, dtype: float64)\n" + ] + } + ], + "source": [ + "print(\"Plus vat:\", ceny * 1.23)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['RL', 'RM', 'C (all)', 'FV', 'RH'], dtype=object)" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.MSZoning.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RL 1151\n", + "RM 218\n", + "FV 65\n", + "RH 16\n", + "C (all) 10\n", + "Name: MSZoning, dtype: int64" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.MSZoning.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "data['nowa'] = ceny * 1.23" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MSSubClassMSZoningLotFrontageStreetAlleyLotShapeLandContourUtilitiesLotConfigLandSlope...PoolQCFenceMiscFeatureMiscValMoSoldYrSoldSaleTypeSaleConditionSalePricenowa
060RL65.0PaveNaNRegLvlAllPubInsideGtl...NaNNaNNaN022008WDNormal208500256455.0
120RL80.0PaveNaNRegLvlAllPubFR2Gtl...NaNNaNNaN052007WDNormal181500223245.0
260RL68.0PaveNaNIR1LvlAllPubInsideGtl...NaNNaNNaN092008WDNormal223500274905.0
370RL60.0PaveNaNIR1LvlAllPubCornerGtl...NaNNaNNaN022006WDAbnorml140000172200.0
460RL84.0PaveNaNIR1LvlAllPubFR2Gtl...NaNNaNNaN0122008WDNormal250000307500.0
\n", + "

5 rows × 80 columns

\n", + "
" + ], + "text/plain": [ + " MSSubClass MSZoning LotFrontage Street Alley LotShape LandContour \\\n", + "0 60 RL 65.0 Pave NaN Reg Lvl \n", + "1 20 RL 80.0 Pave NaN Reg Lvl \n", + "2 60 RL 68.0 Pave NaN IR1 Lvl \n", + "3 70 RL 60.0 Pave NaN IR1 Lvl \n", + "4 60 RL 84.0 Pave NaN IR1 Lvl \n", + "\n", + " Utilities LotConfig LandSlope ... PoolQC Fence MiscFeature MiscVal \\\n", + "0 AllPub Inside Gtl ... NaN NaN NaN 0 \n", + "1 AllPub FR2 Gtl ... NaN NaN NaN 0 \n", + "2 AllPub Inside Gtl ... NaN NaN NaN 0 \n", + "3 AllPub Corner Gtl ... NaN NaN NaN 0 \n", + "4 AllPub FR2 Gtl ... NaN NaN NaN 0 \n", + "\n", + " MoSold YrSold SaleType SaleCondition SalePrice nowa \n", + "0 2 2008 WD Normal 208500 256455.0 \n", + "1 5 2007 WD Normal 181500 223245.0 \n", + "2 9 2008 WD Normal 223500 274905.0 \n", + "3 2 2006 WD Abnorml 140000 172200.0 \n", + "4 12 2008 WD Normal 250000 307500.0 \n", + "\n", + "[5 rows x 80 columns]" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.drop('LotArea', axis=1)\n", + "data.drop(['Id', 'LotArea'], axis=1).head()" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
IdMSSubClassMSZoningLotFrontageLotAreaStreetAlleyLotShapeLandContourUtilities...PoolQCFenceMiscFeatureMiscValMoSoldYrSoldSaleTypeSaleConditionSalePricenowa
1220RL80.09600PaveNaNRegLvlAllPub...NaNNaNNaN052007WDNormal181500223245.0
2360RL68.011250PaveNaNIR1LvlAllPub...NaNNaNNaN092008WDNormal223500274905.0
3470RL60.09550PaveNaNIR1LvlAllPub...NaNNaNNaN022006WDAbnorml140000172200.0
4560RL84.014260PaveNaNIR1LvlAllPub...NaNNaNNaN0122008WDNormal250000307500.0
5650RL85.014115PaveNaNIR1LvlAllPub...NaNMnPrvShed700102009WDNormal143000175890.0
\n", + "

5 rows × 82 columns

\n", + "
" + ], + "text/plain": [ + " Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n", + "1 2 20 RL 80.0 9600 Pave NaN Reg \n", + "2 3 60 RL 68.0 11250 Pave NaN IR1 \n", + "3 4 70 RL 60.0 9550 Pave NaN IR1 \n", + "4 5 60 RL 84.0 14260 Pave NaN IR1 \n", + "5 6 50 RL 85.0 14115 Pave NaN IR1 \n", + "\n", + " LandContour Utilities ... PoolQC Fence MiscFeature MiscVal MoSold \\\n", + "1 Lvl AllPub ... NaN NaN NaN 0 5 \n", + "2 Lvl AllPub ... NaN NaN NaN 0 9 \n", + "3 Lvl AllPub ... NaN NaN NaN 0 2 \n", + "4 Lvl AllPub ... NaN NaN NaN 0 12 \n", + "5 Lvl AllPub ... NaN MnPrv Shed 700 10 \n", + "\n", + " YrSold SaleType SaleCondition SalePrice nowa \n", + "1 2007 WD Normal 181500 223245.0 \n", + "2 2008 WD Normal 223500 274905.0 \n", + "3 2006 WD Abnorml 140000 172200.0 \n", + "4 2008 WD Normal 250000 307500.0 \n", + "5 2009 WD Normal 143000 175890.0 \n", + "\n", + "[5 rows x 82 columns]" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.drop(0).head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "celltoolbar": "Slideshow", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/labs06/README.md b/labs06/README.md new file mode 100644 index 0000000..e35b68f --- /dev/null +++ b/labs06/README.md @@ -0,0 +1,18 @@ +## Zadania + +** zad. 0 ** +Sprawdź, czy masz zainstalowany pakiet ``pandas``. Jeżeli nie, zainstaluj go. + +** zad. 2 (domowe) ** +Jest to zadanie złożone, składające się z kilku części. Całość będzie opierać się o dane zawarte w pliku *mieszkania.csv* i dotyczą cen mieszkań w Poznaniu kilka lat temu. + 1, Otwórz plik ``task02.py``, który zawiera szkielet kodu, który będziemy rozwijać w tym zadaniu. + 1. Napisz funkcje, która wczyta zestaw danych z pliku *mieszkania.csv* i zwróci obiekt typu *DataFrame*. Jeżeli wszystko zostało zrobione poprawnie, powinno się wyśtwietlić 5 pierwszych wierszy. + 1. Uzupełnij funkcję ``most_common_room_number``, która zwróci jaka jest najpopularniejsza liczba pokoi w ogłoszeniach. Funkcji powinna zwrócić liczbę całkowitą. + 1. Uzupełnij kod w funkcji ``cheapest_flats(dane, n)``, która wzróci *n* najtańszych ofert mieszkań. Wzrócony obiekt typu ``DataFrame``. + 1. Napisz funkcje ``find_borough(desc)``, która przyjmuje 1 argument typu *string* i zwróci jedną z dzielnic zdefiniowaną w liście ``dzielnice``. Funkcja ma zwrócić pierwszą (wzgledem kolejności) nazwę dzielnicy, która jest zawarta w ``desc``. Jeżeli żadna nazwa nie została odnaleziona, zwróć *Inne*. + 1. Dodaj kolumnę ``Borough``, która będzie zawierać informacje o dzielnicach i powstanie z kolumny ``Localization``. Wykorzystaj do tego funkcję ``find_borough``. + 1. Uzupełnił funkcje ``write_plot``, która zapisze do pliku ``filename`` wykres słupkowy przedstawiający liczbę ogłoszeń mieszkań z podziałem na dzielnice. + 1. Napisz funkcje ``mean_price``, która zwróci średnią cenę mieszkania ``room_numer``-pokojowego. + 1. Uzupełnij funkcje ``find_13``, która zwróci listę dzielnic, które zawierają ofertę mieszkanie na 13 piętrze. + 1. Napisz funkcje ``find_best_flats``, która zwróci wszystkie ogłoszenia mieszkań, które znajdują się na Winogradach, mają 3 pokoje i są położone na 1 piętrze. + 1. *(dodatkowe)*: Korzystając z pakietu *sklearn* zbuduj model regresji liniowej, która będzie wyznaczać cenę mieszkania na podstawie wielkości mieszkania i liczby pokoi. diff --git a/labs06/tasks.py b/labs06/tasks.py new file mode 100755 index 0000000..0d38505 --- /dev/null +++ b/labs06/tasks.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +1. Zaimportuj bibliotkę pandas jako pd. +""" + + +""" +2. Wczytaj zbiór danych `bikes.csv` do zniennej data. +""" + + +""" +3. Wyświetl 5 pierwszych wierszy z data. +""" + + +""" +4. Wyświetl nazwy kolumn. +""" + + +""" +5. Wyświetl ile nasz zbiór danych ma kolumn i wierszy. +""" + + +""" +6. Wyświetl kolumnę 'City' z powyższego zbioru danych. +""" + + +""" +7. Wyświetl jakie wartoścu przyjmuje kolumna 'City'. +""" + +""" +8. Wyświetl tabelę rozstawną kolumny City. +""" + + +""" +9. Wyświetl tylko pierwsze 4 wiersze z wcześniejszego polecenia. +""" + + +""" +10. Wyświetl, w ilu przypadkach kolumna City zawiera NaN. +""" + + + +""" +11. Wyświetl data.info() +""" + +""" +12. Wyświetl tylko kolumny Borough i Agency i tylko 5 ostatnich linii. +""" + + +""" +13. Wyświetl tylko te dane, dla których wartość z kolumny Agency jest równa +NYPD. Zlicz ile jest takich przykładów. +""" + +""" +14. Wyświetl wartość minimalną i maksymalną z kolumny Longitude. +""" + +""" +15. Dodaj kolumne diff, która powstanie przez sumowanie kolumn Longitude i Latitude. +""" + + +""" +16. Wyświetl tablę rozstawną dla kolumny 'Descriptor', dla której Agency jest +równe NYPD. +""" diff --git a/labs07/gapminder.csv b/labs07/gapminder.csv new file mode 100644 index 0000000..534a004 --- /dev/null +++ b/labs07/gapminder.csv @@ -0,0 +1,177 @@ +,female_BMI,male_BMI,gdp,population,under5mortality,life_expectancy,fertility +Afghanistan,21.07402,20.62058,1311.0,26528741.0,110.4,52.8,6.2 +Albania,25.65726,26.44657,8644.0,2968026.0,17.9,76.8,1.76 +Algeria,26.368409999999997,24.5962,12314.0,34811059.0,29.5,75.5,2.73 +Angola,23.48431,22.25083,7103.0,19842251.0,192.0,56.7,6.43 +Antigua and Barbuda,27.50545,25.76602,25736.0,85350.0,10.9,75.5,2.16 +Argentina,27.46523,27.5017,14646.0,40381860.0,15.4,75.4,2.24 +Armenia,27.1342,25.355420000000002,7383.0,2975029.0,20.0,72.3,1.4 +Australia,26.87777,27.56373,41312.0,21370348.0,5.2,81.6,1.96 +Austria,25.09414,26.467409999999997,43952.0,8331465.0,4.6,80.4,1.41 +Azerbaijan,27.50879,25.65117,14365.0,8868713.0,43.3,69.2,1.99 +Bahamas,29.13948,27.24594,24373.0,348587.0,14.5,72.2,1.89 +Bahrain,28.790940000000003,27.83721,42507.0,1115777.0,9.4,77.6,2.23 +Bangladesh,20.54531,20.39742,2265.0,148252473.0,55.9,68.3,2.38 +Barbados,29.221690000000002,26.384390000000003,16075.0,277315.0,15.4,75.3,1.83 +Belarus,26.641859999999998,26.16443,14488.0,9526453.0,7.2,70.0,1.42 +Belgium,25.1446,26.75915,41641.0,10779155.0,4.7,79.6,1.82 +Belize,29.81663,27.02255,8293.0,306165.0,20.1,70.7,2.91 +Benin,23.74026,22.41835,1646.0,8973525.0,116.3,59.7,5.27 +Bhutan,22.88243,22.8218,5663.0,694990.0,48.1,70.7,2.51 +Bolivia,26.8633,24.43335,5066.0,9599916.0,52.0,71.2,3.48 +Bosnia and Herzegovina,26.35874,26.611629999999998,9316.0,3839749.0,8.1,77.5,1.22 +Botswana,26.09156,22.129839999999998,13858.0,1967866.0,63.8,53.2,2.86 +Brazil,25.99113,25.78623,13906.0,194769696.0,18.6,73.2,1.9 +Brunei,22.892310000000002,24.18179,72351.0,380786.0,9.0,76.9,2.1 +Bulgaria,25.51574,26.542859999999997,15368.0,7513646.0,13.7,73.2,1.43 +Burkina Faso,21.63031,21.27157,1358.0,14709011.0,130.4,58.0,6.04 +Burundi,21.27927,21.50291,723.0,8821795.0,108.6,59.1,6.48 +Cambodia,21.69608,20.80496,2442.0,13933660.0,51.5,66.1,3.05 +Cameroon,24.9527,23.681729999999998,2571.0,19570418.0,113.8,56.6,5.17 +Canada,26.698290000000004,27.4521,41468.0,33363256.0,5.8,80.8,1.68 +Cape Verde,24.96136,23.515220000000003,6031.0,483824.0,28.4,70.4,2.57 +Chad,21.95424,21.485689999999998,1753.0,11139740.0,168.0,54.3,6.81 +Chile,27.92807,27.015420000000002,18698.0,16645940.0,8.9,78.5,1.89 +China,22.91041,22.92176,7880.0,1326690636.0,18.5,73.4,1.53 +Colombia,26.22529,24.94041,10489.0,44901660.0,19.7,76.2,2.43 +Comoros,22.444329999999997,22.06131,1440.0,665414.0,91.2,67.1,5.05 +"Congo, Dem. Rep.",21.6677,19.86692,607.0,61809278.0,124.5,57.5,6.45 +"Congo, Rep.",23.10824,21.87134,5022.0,3832771.0,72.6,58.8,5.1 +Costa Rica,27.03497,26.47897,12219.0,4429506.0,10.3,79.8,1.91 +Cote d'Ivoire,23.82088,22.56469,2854.0,19261647.0,116.9,55.4,4.91 +Croatia,25.17882,26.596290000000003,21873.0,4344151.0,5.9,76.2,1.43 +Cuba,26.576140000000002,25.06867,17765.0,11290239.0,6.3,77.6,1.5 +Cyprus,25.92587,27.41899,35828.0,1077010.0,4.2,80.0,1.49 +Denmark,25.106270000000002,26.13287,45017.0,5495302.0,4.3,78.9,1.89 +Djibouti,24.38177,23.38403,2502.0,809639.0,81.0,61.8,3.76 +Ecuador,27.062690000000003,25.58841,9244.0,14447600.0,26.8,74.7,2.73 +Egypt,30.099970000000003,26.732429999999997,9974.0,78976122.0,31.4,70.2,2.95 +El Salvador,27.84092,26.36751,7450.0,6004199.0,21.6,73.7,2.32 +Equatorial Guinea,24.528370000000002,23.7664,40143.0,686223.0,118.4,57.5,5.31 +Eritrea,21.082320000000003,20.885089999999998,1088.0,4500638.0,60.4,60.1,5.16 +Estonia,25.185979999999997,26.264459999999996,24743.0,1339941.0,5.5,74.2,1.62 +Ethiopia,20.71463,20.247,931.0,83079608.0,86.9,60.0,5.19 +Fiji,29.339409999999997,26.53078,7129.0,843206.0,24.0,64.9,2.74 +Finland,25.58418,26.733390000000004,42122.0,5314170.0,3.3,79.6,1.85 +France,24.82949,25.853289999999998,37505.0,62309529.0,4.3,81.1,1.97 +Gabon,25.95121,24.0762,15800.0,1473741.0,68.0,61.7,4.28 +Gambia,24.82101,21.65029,1566.0,1586749.0,87.4,65.7,5.8 +Georgia,26.45014,25.54942,5900.0,4343290.0,19.3,71.8,1.79 +Germany,25.73903,27.165090000000003,41199.0,80665906.0,4.4,80.0,1.37 +Ghana,24.33014,22.842470000000002,2907.0,23115919.0,79.9,62.0,4.19 +Greece,24.92026,26.33786,32197.0,11161755.0,4.9,80.2,1.46 +Grenada,27.31948,25.179879999999997,12116.0,103934.0,13.5,70.8,2.28 +Guatemala,26.84324,25.29947,6960.0,14106687.0,36.9,71.2,4.12 +Guinea,22.45206,22.52449,1230.0,10427356.0,121.0,57.1,5.34 +Guinea-Bissau,22.92809,21.64338,1326.0,1561293.0,127.6,53.6,5.25 +Guyana,26.470190000000002,23.68465,5208.0,748096.0,41.9,65.0,2.74 +Haiti,23.27785,23.66302,1600.0,9705130.0,83.3,61.0,3.5 +Honduras,26.73191,25.10872,4391.0,7259470.0,26.5,71.8,3.27 +"Hong Kong, China",23.71046,25.057470000000002,46635.0,6910384.0,3.06,82.49,1.04 +Hungary,25.97839,27.115679999999998,23334.0,10050699.0,7.2,73.9,1.33 +Iceland,26.02599,27.206870000000002,42294.0,310033.0,2.7,82.4,2.12 +India,21.31478,20.95956,3901.0,1197070109.0,65.6,64.7,2.64 +Indonesia,22.986929999999997,21.85576,7856.0,235360765.0,36.2,69.4,2.48 +Iran,27.236079999999998,25.310029999999998,15955.0,72530693.0,21.4,73.1,1.88 +Iraq,28.411170000000002,26.71017,11616.0,29163327.0,38.3,66.6,4.34 +Ireland,26.62176,27.65325,47713.0,4480145.0,4.5,80.1,2.0 +Israel,27.301920000000003,27.13151,28562.0,7093808.0,4.9,80.6,2.92 +Italy,24.79289,26.4802,37475.0,59319234.0,4.1,81.5,1.39 +Jamaica,27.22601,24.00421,8951.0,2717344.0,18.9,75.1,2.39 +Japan,21.87088,23.50004,34800.0,127317900.0,3.4,82.5,1.34 +Jordan,29.218009999999996,27.47362,10897.0,6010035.0,22.1,76.9,3.59 +Kazakhstan,26.65065,26.290779999999998,18797.0,15915966.0,25.9,67.1,2.51 +Kenya,23.06181,21.592579999999998,2358.0,38244442.0,71.0,60.8,4.76 +Kiribati,31.30769,29.2384,1803.0,98437.0,64.5,61.5,3.13 +Kuwait,31.161859999999997,29.172109999999996,91966.0,2705290.0,11.3,77.3,2.68 +Latvia,25.615129999999997,26.45693,20977.0,2144215.0,10.5,72.4,1.5 +Lebanon,27.70471,27.20117,14158.0,4109389.0,11.3,77.8,1.57 +Lesotho,26.780520000000003,21.90157,2041.0,1972194.0,114.2,44.5,3.34 +Liberia,23.21679,21.89537,588.0,3672782.0,100.9,59.9,5.19 +Libya,29.19874,26.54164,29853.0,6123022.0,18.8,75.6,2.64 +Lithuania,26.01424,26.86102,23223.0,3219802.0,8.2,72.1,1.42 +Luxembourg,26.09326,27.434040000000003,95001.0,485079.0,2.8,81.0,1.63 +"Macao, China",24.895039999999998,25.713820000000002,80191.0,507274.0,6.72,79.32,0.94 +"Macedonia, FYR",25.37646,26.34473,10872.0,2055266.0,11.8,74.5,1.47 +Madagascar,20.73501,21.403470000000002,1528.0,19926798.0,66.7,62.2,4.79 +Malawi,22.91455,22.034679999999998,674.0,13904671.0,101.1,52.4,5.78 +Malaysia,25.448320000000002,24.73069,19968.0,27197419.0,8.0,74.5,2.05 +Maldives,26.4132,23.219910000000002,12029.0,321026.0,16.0,78.5,2.38 +Mali,23.07655,21.78881,1602.0,14223403.0,148.3,58.5,6.82 +Malta,27.04993,27.683609999999998,27872.0,406392.0,6.6,80.7,1.38 +Mauritania,26.26476,22.62295,3356.0,3414552.0,103.0,67.9,4.94 +Mauritius,26.09824,25.15669,14615.0,1238013.0,15.8,72.9,1.58 +Mexico,28.737509999999997,27.42468,15826.0,114972821.0,17.9,75.4,2.35 +"Micronesia, Fed. Sts.",31.28402,28.10315,3197.0,104472.0,43.1,68.0,3.59 +Moldova,27.05617,24.2369,3890.0,4111168.0,17.6,70.4,1.49 +Mongolia,25.71375,24.88385,7563.0,2629666.0,34.8,64.8,2.37 +Montenegro,25.70186,26.55412,14183.0,619740.0,8.1,76.0,1.72 +Morocco,26.223090000000003,25.63182,6091.0,31350544.0,35.8,73.3,2.44 +Mozambique,23.317339999999998,21.93536,864.0,22994867.0,114.4,54.0,5.54 +Myanmar,22.47733,21.44932,2891.0,51030006.0,87.2,59.4,2.05 +Namibia,25.14988,22.65008,8169.0,2115703.0,62.2,59.1,3.36 +Nepal,20.72814,20.76344,1866.0,26325183.0,50.7,68.4,2.9 +Netherlands,25.47269,26.01541,47388.0,16519862.0,4.8,80.3,1.77 +New Zealand,27.36642,27.768929999999997,32122.0,4285380.0,6.4,80.3,2.12 +Nicaragua,27.57259,25.77291,4060.0,5594524.0,28.1,77.0,2.72 +Niger,21.95958,21.21958,843.0,15085130.0,141.3,58.0,7.59 +Nigeria,23.674020000000002,23.03322,4684.0,151115683.0,140.9,59.2,6.02 +Norway,25.73772,26.934240000000003,65216.0,4771633.0,3.6,80.8,1.96 +Oman,26.66535,26.241090000000003,47799.0,2652281.0,11.9,76.2,2.89 +Pakistan,23.44986,22.299139999999998,4187.0,163096985.0,95.5,64.1,3.58 +Panama,27.67758,26.26959,14033.0,3498679.0,21.0,77.3,2.61 +Papua New Guinea,25.77189,25.015060000000002,1982.0,6540267.0,69.7,58.6,4.07 +Paraguay,25.90523,25.54223,6684.0,6047131.0,25.7,74.0,3.06 +Peru,25.98511,24.770410000000002,9249.0,28642048.0,23.2,78.2,2.58 +Philippines,23.4671,22.872629999999997,5332.0,90297115.0,33.4,69.8,3.26 +Poland,25.918870000000002,26.6738,19996.0,38525752.0,6.7,75.4,1.33 +Portugal,26.183020000000003,26.68445,27747.0,10577458.0,4.1,79.4,1.36 +Puerto Rico,30.2212,28.378040000000002,35855.0,3728126.0,8.78,77.0,1.69 +Qatar,28.912509999999997,28.13138,126076.0,1388962.0,9.5,77.9,2.2 +Romania,25.22425,25.41069,18032.0,20741669.0,16.1,73.2,1.34 +Russia,27.21272,26.01131,22506.0,143123163.0,13.5,67.9,1.49 +Rwanda,22.07156,22.55453,1173.0,9750314.0,78.3,64.1,5.06 +Samoa,33.659079999999996,30.42475,5731.0,183440.0,18.8,72.3,4.43 +Sao Tome and Principe,24.88216,23.51233,2673.0,163595.0,61.0,66.0,4.41 +Saudi Arabia,29.598779999999998,27.884320000000002,44189.0,26742842.0,18.1,78.3,2.97 +Senegal,24.30968,21.927429999999998,2162.0,12229703.0,75.8,63.5,5.11 +Serbia,25.669970000000003,26.51495,12522.0,9109535.0,8.0,74.3,1.41 +Seychelles,27.973740000000003,25.56236,20065.0,91634.0,14.2,72.9,2.28 +Sierra Leone,23.93364,22.53139,1289.0,5521838.0,179.1,53.6,5.13 +Singapore,22.86642,23.83996,65991.0,4849641.0,2.8,80.6,1.28 +Slovak Republic,26.323729999999998,26.92717,24670.0,5396710.0,8.8,74.9,1.31 +Slovenia,26.582140000000003,27.43983,30816.0,2030599.0,3.7,78.7,1.43 +Solomon Islands,28.8762,27.159879999999998,1835.0,503410.0,33.1,62.3,4.36 +Somalia,22.66607,21.969170000000002,615.0,9132589.0,168.5,52.6,7.06 +South Africa,29.4803,26.85538,12263.0,50348811.0,66.1,53.4,2.54 +Spain,26.30554,27.49975,34676.0,45817016.0,5.0,81.1,1.42 +Sri Lanka,23.11717,21.96671,6907.0,19949553.0,11.7,74.0,2.32 +Sudan,23.16132,22.40484,3246.0,34470138.0,84.7,65.5,4.79 +Suriname,27.749859999999998,25.49887,13470.0,506657.0,26.4,70.2,2.41 +Swaziland,28.448859999999996,23.16969,5887.0,1153750.0,112.2,45.1,3.7 +Sweden,25.1466,26.37629,43421.0,9226333.0,3.2,81.1,1.92 +Switzerland,24.07242,26.20195,55020.0,7646542.0,4.7,82.0,1.47 +Syria,28.87418,26.919690000000003,6246.0,20097057.0,16.5,76.1,3.17 +Tajikistan,23.84799,23.77966,2001.0,7254072.0,56.2,69.6,3.7 +Tanzania,23.0843,22.47792,2030.0,42844744.0,72.4,60.4,5.54 +Thailand,24.38577,23.008029999999998,12216.0,66453255.0,15.6,73.9,1.48 +Timor-Leste,21.50694,20.59082,1486.0,1030915.0,70.2,69.9,6.48 +Togo,22.73858,21.87875,1219.0,6052937.0,96.4,57.5,4.88 +Tonga,34.25969,30.99563,4748.0,102816.0,17.0,70.3,4.01 +Trinidad and Tobago,28.27587,26.396690000000003,30875.0,1315372.0,24.9,71.7,1.8 +Tunisia,27.93706,25.15699,9938.0,10408091.0,19.4,76.8,2.04 +Turkey,28.247490000000003,26.703709999999997,16454.0,70344357.0,22.2,77.8,2.15 +Turkmenistan,24.66154,25.24796,8877.0,4917541.0,63.9,67.2,2.48 +Uganda,22.48126,22.35833,1437.0,31014427.0,89.3,56.0,6.34 +Ukraine,26.23317,25.42379,8762.0,46028476.0,12.9,67.8,1.38 +United Arab Emirates,29.614009999999997,28.053590000000003,73029.0,6900142.0,9.1,75.6,1.95 +United Kingdom,26.944490000000002,27.392490000000002,37739.0,61689620.0,5.6,79.7,1.87 +United States,28.343590000000003,28.456979999999998,50384.0,304473143.0,7.7,78.3,2.07 +Uruguay,26.593040000000002,26.39123,15317.0,3350832.0,13.0,76.0,2.11 +Uzbekistan,25.43432,25.32054,3733.0,26952719.0,49.2,69.6,2.46 +Vanuatu,28.458759999999998,26.78926,2944.0,225335.0,28.2,63.4,3.61 +Venezuela,28.134079999999997,27.445,17911.0,28116716.0,17.1,74.2,2.53 +Vietnam,21.065,20.9163,4085.0,86589342.0,26.2,74.1,1.86 +West Bank and Gaza,29.026429999999998,26.5775,3564.0,3854667.0,24.7,74.1,4.38 +Zambia,23.05436,20.68321,3039.0,13114579.0,94.9,51.1,5.88 +Zimbabwe,24.645220000000002,22.0266,1286.0,13495462.0,98.3,47.3,3.85 diff --git a/labs07/sklearn.ipynb b/labs07/sklearn.ipynb new file mode 100644 index 0000000..c21dcbb --- /dev/null +++ b/labs07/sklearn.ipynb @@ -0,0 +1,485 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "# Analiza danych w Pythonie: sklearn\n", + "\n", + "### Tomasz Dwojak\n", + "\n", + "### 3 czerwca 2018" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + " * Pierwsza część: pandas\n", + " * Druga część: sklearn" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Przypomnienie z UMZ\n", + " * przygotowanie i czyszczenie danych\n", + " * wybór i trening modelu\n", + " * tuning\n", + " * ewaluacja" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "import sklearn\n", + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "data = pd.read_csv(\"./gapminder.csv\", index_col=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
female_BMImale_BMIgdppopulationunder5mortalitylife_expectancyfertility
Afghanistan21.0740220.620581311.026528741.0110.452.86.20
Albania25.6572626.446578644.02968026.017.976.81.76
Algeria26.3684124.5962012314.034811059.029.575.52.73
Angola23.4843122.250837103.019842251.0192.056.76.43
Antigua and Barbuda27.5054525.7660225736.085350.010.975.52.16
\n", + "
" + ], + "text/plain": [ + " female_BMI male_BMI gdp population \\\n", + "Afghanistan 21.07402 20.62058 1311.0 26528741.0 \n", + "Albania 25.65726 26.44657 8644.0 2968026.0 \n", + "Algeria 26.36841 24.59620 12314.0 34811059.0 \n", + "Angola 23.48431 22.25083 7103.0 19842251.0 \n", + "Antigua and Barbuda 27.50545 25.76602 25736.0 85350.0 \n", + "\n", + " under5mortality life_expectancy fertility \n", + "Afghanistan 110.4 52.8 6.20 \n", + "Albania 17.9 76.8 1.76 \n", + "Algeria 29.5 75.5 2.73 \n", + "Angola 192.0 56.7 6.43 \n", + "Antigua and Barbuda 10.9 75.5 2.16 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "y = data['life_expectancy']\n", + "X = data.drop('life_expectancy', axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "train_X, test_X, train_y, test_y = \\\n", + " train_test_split(X, y, test_size=0.2, random_state=123, shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "model = LinearRegression()\n", + "model.fit(X,y)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([67.56279809, 76.25840076, 50.21126326, 59.21303855, 72.06348723])" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predicted = model.predict(test_X)\n", + "predicted[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RMSE: 3.5179543848147863\n" + ] + } + ], + "source": [ + "from sklearn.metrics import mean_squared_error\n", + "rmse = np.sqrt(mean_squared_error(predicted, test_y))\n", + "print(\"RMSE:\", rmse)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.795295000468209" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " r2 = model.score(test_X, test_y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "#### API\n", + " * model\n", + " * `fix`\n", + " * `predict`" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "female_BMI: -1.18\n", + "male_BMI: 1.46\n", + "gdp: 5.11e-05\n", + "population: 7.21e-10\n", + "under5mortality: -0.159\n", + "fertility: 0.421\n" + ] + } + ], + "source": [ + "for p in zip(train_X.columns, model.coef_):\n", + " print(\"{}: {:.3}\".format(p[0], p[1]))" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/lib/python3.6/site-packages/ipykernel_launcher.py:2: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead\n", + " \n" + ] + }, + { + "data": { + "text/plain": [ + "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model2 = LinearRegression()\n", + "model2.fit(train_X['male_BMI'].reshape(-1, 1), train_y)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.5852413468462743" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model2.intercept_" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/lib/python3.6/site-packages/ipykernel_launcher.py:5: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead\n", + " \"\"\"\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xt8VPWZ+PHPMyGxAZQ73hOsom6VQhVv21arWF4KrkVpXWyw/LCVtfqr4OXX2k1XUUtrtStod7Wya2teJLogQmm7oAXUpVsrysUAai1qCa0XkABBJAJJnt8fc2EmmcuZmTNz5px53r58kXzn9j0zmed8z3Oe7/eIqmKMMcb/Ql53wBhjjDssoBtjTEBYQDfGmICwgG6MMQFhAd0YYwLCAroxxgSEBXRjjAkIC+jGGBMQFtCNMSYgehXzxQYPHqzDhg0r5ksaY4zvrV27doeqDsl0v6IG9GHDhrFmzZpivqQxxvieiLQ4uZ+lXIwxJiAsoBtjTEBYQDfGmICwgG6MMQFhAd0YYwLCAroxxgSEBXRjjAkIC+jGGFMgqsodd9zB8uXLi/J6RZ1YZIwx5aCjo4OpU6fS2NgIgIjQ1dVV8Ne1Eboxxrikvb2dsWPHUllZGQvm55xzDm1tbUV5fRuhG2NMntra2rjgggtobm6OtV1++eUsWLCAww47rGj9sBG6Mcbk6IMPPuCYY46hf//+sWB+3XXX0dHRwZIlS4oazMECujHGZO3tt9+mV69eHH300bz//vsA1NfX09XVxdy5c6moqPCkX5ZyMcYYh5qbmxk1alRC25w5c5g+fbpHPUpkAd0YYzJYtWoVF1xwQUJbY2MjdXV1HvUoOQvoxhiTwq9+9SuuuOKKhLalS5dy6aWXetSj9CyHbowx3Tz22GOISEIwf/HFF1HVkg3mYAHdGGNi7r33XkSEb33rW7G21157DVXlvPPO87BnzljKxRhT1lSVW2+9ldmzZ8faDj/8cDZt2kRNTY2HPcueBXRjTFnq6OhgypQpPPHEE7G2E088kZdeeonBgwd72LPcWUA3xpSV9vZ2Lr/8clasWBFrO++88/jd735H3759PexZ/iygG2PKwu7duzn//PPZuHFjrG3ChAnMnz+fqqoqD3vmHjspaowJtPfff5+jjjqKAQMGxIL5tGnT6OzsZPHixYEJ5mAB3RgTUG+99RahUIhjjjmGbdu2AfAv//IvdHV18eijjxIKBS/8BW+LjAGaNjYxbM4wQneFGDZnGE0bm7zuUsGU07Y6sX79ekSE4cOHo6oAPPTQQ6gqd999NyLicQ8Lx3LoJnCaNjYx7TfT2HdwHwAtbS1M+800AOpGlNZU7XyV07Zm8sILL3DhhRcmtD3xxBNcffXVHvWo+GyEbgKnfmV9LMBF7Tu4j/qV9UV5/WKOmL3e1lKwePFiRCQhmD/zzDOoalkFc7ARugmgrW1bs2p3U7FHzF5uq9f+4z/+g2nTpiW0/fGPf+Tcc8/1qEfesxG6CZyafsln96Vqd5OTEbObI3in2xqkPPuPfvQjRCQhmL/++uuoalkHc7CAbgJo1phZ9K7sndDWu7I3s8bMKvhrZxoxR0fwLW0tKBobwecaYJ1sq9uv6QVVZcaMGYgI9fXhnWO/fv3YunUrqsrf/d3fedzD0mAB3QRO3Yg65v7DXGr71SIItf1qmfsPc11JeWQa6WYaMbud83ayrdOXTfdtnr2jo4Orr76aUCjEgw8+CMDw4cPZsWMHu3fv5vjjj/e4h6VFomU9xTB69Ghds2ZN0V7PGDd1z49DeDQcH0Az3Sd0Vwil53dOELru7HK9v9OXTae1vTXp7YV4Tbe0t7dz2WWX8dxzz8XaPv/5z/PMM8/4fnp+LkRkraqOznQ/G6Eb45CT0XWmEXOx8vtNG5u4dsm1KYM5QEhCJZdT3717NyNGjKB3796xYH7FFVewf/9+/vd//7csg3k2LKAb45DTipK6EXVsmbGFrju72DJjS0L6w838frr0z/Rl0znQeSDt4zu1s2Ry6u+99x5Dhw5lwIABbNq0CYDrr7+ezs5OFi1aFKjp+YVkAd0Yh1KNorMZ6daNqGPKyClUSPiq8BVSwZSRU7LO72c60ZluZJ7MvoP7mLJ4StGD+ubNmxERjj32WD788EMA7rjjDrq6unjkkUcCOT2/kBy9WyJys4i8JiKbRORJEfmUiJwgIqtF5C0RmS8itgs1gZZsdA3ZjXSbNjbR0NxAp3bGHtvQ3JB1IE2V/pm+LPerz3dqZ9FG6uvWrUNEOPnkk2Nt//Zv/4aqctdddwV6en4hZQzoInIscBMwWlVPByqAScBPgNmqehKwC/hmITtqjNe658ejo+x4mapH3KpySZX+aW1vpWljE4OqB2X1fPn0JRvPP/88IsKZZ54Za3vyySdRVW688caCvW65cHo80wuoFpFeQG/gfeAiYGHk9gZggvvdM6a0xOfHuzR5hUi6WZotbS1p251OAEp3ErV+ZT1XnXZVj/bKUKWjQF+IWaZPP/00IsJFF10Ua3v22WdRVSZNmuT665WrjAFdVd8FfgpsJRzI24C1wG5V7Yjc7W/AsckeLyLTRGSNiKyJ5siMCYJcKlaSjeohXEI4+L7BTF402dEEoHQnUVvaWmhobujx/N8641vsbN+Z8nFO+p+tuXPnIiJ89atfjbWtXr0aVWXs2LGuvY4Jc5JyGQB8BTgBOAboA1zi9AVUda6qjlbV0UOGDMm5o8Z4Id2IOZeKlWjuvDtFk57ITJUCqRtRl3K0XSEVPdI6irJ089KMwdqtGbWzZs1CRPinf/qnWNsbb7yBqnL22Wfn/fwmOScpl4uBv6jqh6p6EFgEfB7oH0nBABwHvFugPhrjiUyVJLnMSK3tV5t1P1KlQB689MGkO5RUO42tbVuT7oQEifUtnxm1XV1d3HTTTYgIP/jBDwDo378/f/3rX1FVTj311Jye1zjnJKBvBc4Vkd4SPvU8BngdeB6IHkdNAZYUpovGeMPpRKJoTn3WmFnUr6xPm/9OVSmTTqpRdaodSqqdRk2/mqSPmXflPPRO7VEz79TBgweZNGkSFRUV/OxnPwPg5JNPZseOHezatYvjjjsu6+c0ucm4fK6qrhaRhcA6oANYD8wF/hv4LxH5YaTtsUJ21Jhiy2ZpWqfL5kZ/rl9Zz9a2rdT0q2Hvgb0p68YzpUDqRtQlDcLdlx8QhHHDx6V9TLb27dvHZZddxvPPPx9r++IXv8iyZcvo06dP3s9vsueoykVV71TVU1X1dFW9RlX3q+o7qnq2qp6kql9T1f2F7qwpvCAts5qvbE56ZlOO2H0mabLUCcCg6kE91olx8tlEJy9FUykQzqHnUu+ezK5duzjttNPo06dPLJhPnDiR/fv3s2rVKgvmHrJpWCbGT8usFmPHk81Jz3wuNJEsDdJ4ZSM7vrujx6JfTj+bpZuX9lgELN8a8/fee48hQ4YwcOBAXn/9dQBuuOEGOjs7WbhwoU3PLwG22qKJGTZnWNI66dp+tWyZsaX4HUrByaqHbr5WfHpk1phZSV+j0O9dts/v5qqOf/7znznllFMS2q789pUs/PeFNqOzSGy1RZM1v1zOrJjX0Uy30FY8NxbdSnfUke6zSfY4N1Z1XLt2LSKSGMzHATPhmeOe4YlNTzh+LlMcFtBNjJeXbstGKe54cr2oRjQYy13CNYuuSZlSSfUZDKwemDQVM274uKx2MPE7hSNvPBIRYfTouAHhV4GZQKSE3C8XyCg3FtBNjJeXbstGqe54uo/mgbR5/vi8OJA2553qs4ner/vjlm5e6ngHE+vHiy3oTGX7w9tjty1fvhyZKXB6z+0t5A7UTs7nJmPZoikfyUrqUuWMvTRrzKykOfRS2vE4KWNMljrqLho0U3021yy6JuXjnJYn3nT3Texb2K0f10HtZ2q5+OKLqdlUkzR/X6gdqNMSUNOTjdBNAqc5Yy8V8pqhbnGS53cywo0Pmsk+m0xHK6lGuqrKPffcg4iwc2Hc+i7/l3Bq5dhD/ct05Ob2aLqY50iCxqpcjCmAVFUmEK5M2dq2lZCEUk7TB2eVO+kqfqDnBKPqimq+8PoXWP7k8kN97R2i6/ouOKJnP6Opo1TVPoWoOCrmdVf9wmmViwV0YwogVZmhICkDffzttf1qHae7UgXbhD50Ak8TXrQj4tRTT+UPf/gDy95dlnNQLkS5pl/KZ4vJaUC3HLoJDKc148Uwbvg4fr7m5wnBO1Mwr5AKpp05jYfHP5zVa6XKlW9t2woHgCYgPj7Wwt7X9sZmdNYNzP3cSSEqjvxwjqRUWUA3gVBKJ9Kil5nrHrzTBXM4dDm6z9d8Pu8+79y5k4qHK+jY3nGo8TPARKgdWNtjen6u67vU9HP/hKlfTs6XIku5mEAopcP0VH1xKp8+v/vuu3z2s59l5864E51nE76CQcj9GbXFnLVbzmymqCkrpTTZKN/XzOXxb775JiLCcccdFwvmd999N/Oa51F7dS0Syq0aKFMFix8qjsqJBXRT8pyUxRVyslG2ZXn5vmZIQo5L/9asWYOIJFw84pFHHkFV+fSET/OD535AS1sLIQnR0tZC/cp6x8/tdEEwP5S6lgsL6KakOQ0qhZrlmssKlLlcxCJep3ZmfI0VK1YgIpx11lmxtgULFqCqXH/99T1moUbLI7NZQdPqwf3HAropuHwmnjgNKoU69M8lqHXvS9+qvlm/bqrXeOqppxARvvzlL8faVqxYgaryta99LW2/nfY/qpTSWMYZq3IxBZVv9Uk2QcWtK/HElz+mqkzJFNTi+zJszjD2HtibdT+ir9G0sYmb7r4pcUYn8MorryQuoJVF/5zOUi3mlH+TPxuhm4LK97A9VfAISaggCzd1T7Fk269kch3RHn/E8Uy8YSKTPzs5IZh/6uZP0bihMWUwd9I/J/33y2Jt5hAL6Kag8j1sT5WP7tTOglxVycmCWdkGtVTBc1D1oNgFneMvF0cX9FrWi623bGXRI4siLwrcAsyET/p9knGHmC6P77T/VsHiP1aHbgrKjfrw+BRIqvVP3Ko3T7cGiyBZTXKJ9rulraXHLNH49Vai9wl1heh6qgveiHuSIcC1QHXPvmRa1+SG/76BuWvnJrxf2SwpYEqH1aH7TFDXf3bjsD2+LK5LkwcxN07UNW1sIiTJvxK1/WqzKstLttZ5dBQeHelCePGslg9b4BfQdfehYH7hhRfy8ccfU/vPtT2COYQvbJFprfWG5oaEYB593y2YB5cF9BLgp4szZ8vtw/ZC1ZtHP4Nko/9c8sbJUjfdR/63//p29j2wD34ERPdHp0HNT2t47rnn6N27d9IdYmWoko8OfJT278VKDsuTpVxKQClNWy91hZpqnuozqJAKGq5oyPq506Vu2A50X38rbnp+93RK90XH9h7YS2t7a4+njf97sSVog8VWW/QRq/d1rlALN6V6rzu1MzaqzeY1kpb8bQV+0e2OFwHn93xsvO7lmKG7kh9Yx2+DlRyWJ0u5lIBSvUZmqcp3qnmy8xXp3uuWtham/moqg+8b7PgcR0Kq5A3CVwGKD+ZnhtvkfEl4nJP0jpO/Fys5LE8W0EuAffkKKz6AD75vMNcuubZH/nnc8HFpp+sf7DpIa3ur43McdSPq+Mf2fwwH8vlxN1xIuO0feq6PLghTRk7JuINy8vdiJYflyXLoJaKULs4QJMly7slEy/miJYROpDrHcccdd3DPPfcktFVdUcWBkQdiv6e62IXT8yb291Je7BJ0xuB8bfL4k4W5PAZg6tSpPP744wn3+e1vf8v48eMTatIrpCLltUTtpKVJxurQjcH5ieWB1QNjPztdLbGmXw2qygUXXICIJATzl156CVVl/PjxQDgFEn3edBeGtvMmJh8W0E2gOQ2Qe/bvieXEu+efB1UPoqqiKuH+1RXVfHTvR4RCIVatWhVrf/PNN1FVzjnnnB6vkWlZATtvYvJlZYsmsJo2Njle5fBg10GmL5ueMi8dS5nsaIFZ0B75L+r999/nqKOOSvsa6Y4WbEq+cYMFdBNIqU6GDqoelHRSDkBre2vstu7L/I47bhyTb57c4zFtbW0cccQRjvo0sHpgxglBxuTDUi4mkFKlN/pW9Y2tcJjJvoP7+N7T30NEGDjwUI69qqqK/fv3o6qOg3nTxiY+OvBRj/bKUKWlWYxrLKCbkuPGQmXpZt86Oum5HZgJ7971bqzp1FNPpbOzk/3791NVVZXyocnUr6znQOeBHu1HHHaEpVmMayygm5Li1kJl6WZTJpt0M6h6UPgOLYQn/sSttXLJJZegqrzxxhuEQrl9ZVLtYHa270zabkwuLKCXAT8tzevWKoGpRuF7D+ylaWNTj+UDJldODgfyXx66b6+ze9G4oZFly5blsCWJ3FrewU+fpSk+C+gB57eled1aqCw6Co+NvCNa21sTtv/hhx9GRHjwlgcP3ekiqJ1dy+P/+bhr6RA3lnfw22dpis8CesC5vS52oUeIbi5UVjeijr5VfXu07zu4jxtuvQER4cYbb4y1//KXv0RV0ZWa06JfmfqS79oqtsa5/xT7iCpjQBeRU0Tk1bj/94jIDBEZKCLLRWRz5N8BBe2pyYmbS/MWY4To9kJlPbZzETAT9izfE2taunQpjRsambl7ZkG/eE5WiUwXAGyZZX/x4ogqY0BX1TdVdZSqjiK86Oc+YDFwO7BSVYcDKyO/mxLj5oi3GCPEglzhSIHHCOfINxy6bfXq1agqO4/b6doXL58RWaYAYMss+4sXR1TZplzGAG+ragvwFaAh0t4ATHCzY8YdyUa8gjBu+Lisn6tYI8R81zuP6uzspO1HbXAX8NdD7Z+6+VM0bmjk7LPPBtz74uU7IsvUD1tm2V+8OKLKNqBPAp6M/Hykqr4f+fkD4EjXemVcUzeijikjp8QuUAzha1s2NDe4WgoIpVOB0d7ejojQq1cvdn+4+9ANt4ZPdk4dM5X6lfWxfqZaWTHbL16+O4ZMAcDWOPcXL46oHC+fKyJVwHvAaaq6TUR2q2r/uNt3qWqPPLqITAOmAdTU1JzZ0uJsrWnjHreuWZruep5AQa71mY2dO3cyaNCgHu179uzh8MMPB5JvQ6q1ySukgi7tcrzeeL7X8bRrywaLm9e/LcTyuZcC61R1W+T3bSJydOTFjiY8t64HVZ2rqqNVdfSQIUOyeDnjFrdLAZONEL2swNi6dSsikhDMq6urY9Pzo8Ecko+iFU04gonq1M6sUif5jsgspRIsXhxRZTNC/y/gWVX9ZeT3+4FWVb1XRG4HBqrqd9M9h13gwhvFGPl5cZX5TZs2MWLEiIS2z3zmM2zcuDHljM5U/YTw+7G1bSshCSVdszzT++XGiMyuRGSScXWELiJ9gC8TLvqKuhf4sohsBi6O/G5KUDFGfsXMF65atQoRSQjm48ePR1V57bXX0k7PT9WfaLDuurOLLk2+A8p0ROPGiMytE8KmPDkK6Kr6saoOUtW2uLZWVR2jqsNV9WJV9e2iFKVyMq9QinHoV4ydxtNPP42IcMEFF8TabrzxRlSV3/72t671M5+dkwVk4ylVLdr/Z555phZT44ZGrZ1dqzJTtHZ2rTZuaEx6n96zeiszif3fe1bvpPc16Tl5v3Pxs5/9TAlXk8f+//GPf1ywftrfhCk1wBp1EGMDe5Fop/lMqywoXd///ve5997ETF5DQwPf+MY3Cv7alss+xN4L75X9RaKdVl0EbTp1ENJHdXV1iEhCMH/mmWdQVdeDear3y2nqJAjvdzq2IJi/BPYSdE4DdU2/mqQjdD9Op+5+VNL9MmqlTFX5+7//e1566aWE9ldeeYXRozMOTHKS7/vl5/fbqXQDo6BsY5AEdoTu9MRWkGp//bgaX0dHB0cddRShUCghmG/evBlVLVgwh/zfLz++39kK2hFs0AU2oDsN1KU2nTqfQ3g/ffn27duHiFBZWcm2bdti7R988AGqykknnVTwPuT7fjl5vN9TMrYgmL8ENuUSDchOTubUjagricPHfA/h/ZA+am1tZfDgwT3a46fnF0u+71emxwchJTNrzKykxQV+PIItB4EdoYP/aoLzPYQv5fRRS0sLIpIQzPv06cOBAwd6TM8vlnzfr0yPD0JKptSOYE16gR2h+1G+KYBsjkqKZcOGDYwcOTKhbcSIETQ3NyPSc/2UYsr3/cr0eD+lwNIplSNYk1lg69D9KEg18S+88AIXXnhhQtvll1/OkiVLPOpR8QXp8zTeKvs69GyVwsmrUk6ZOPXUU08hIgnB/Dvf+Q6qWlbBHILxeRp/sYBO6Uye8HO+8qGHHkJEuOqqq2Jt9913H6rKQw895GHPvOPnz9P4k6VcKPyhcZCnTn/ve9/jvvvuS2ibN28ekydP9qhHxgSPpVyyUMiTV4Ua/XudIpo0aRIikhDMn332WVTVgrkxHrGATmEnTxSidM2rFJGqcs455yAizJ8/P9a+Zs0aVJWxY8cW9PWNMelZQKewJ68KMfovdn1zR0cHQ4YMIRQK8fLLL8fa33rrLVSVP1X9yfMTysYYC+hAYU9eFWL0X6z65vjp+Tt27Ii1b9u2DVXlxBNPLIkTyl6nn3Lhxz6b0mcBPaL7rFLAlS9cIUb/hV5fo7W1FRGhT58+Ce0fffQRqsrQoUNjbV7PhiyFHUq2/Nhn4w8W0JNw8wtXiNF/oVJEW7Zs6TE9/4gjjohNz+/bt2+Px3g9G9LrHUou/Nhn4w9WtphEqc7wiy9/HFg9EICd7Tup6VfDuOHjWLp5aU6lkc3NzYwaNSqhbdSoUaxbty7j9Hyv36vQXSGUnn/DgtB1Z/KLPXvNj3023rKyxTx4PepMpvtRQ2t7K+0d7cy7ch6zxsyiobkh6yOK5557DhFJCOYTJkxAVVm/fr2jtVa8ng3px+Vd/dhn4w8W0JMoxS9cusP0bA/h58+fj4gwZsyYWNuMGTNQVRYvXpxVv7yeDen1DiUXfuyz8QcL6EkU+wvnpOIh3VGD0yOKOXPmICJMmjQp1vbTn/4UVWX27Nk599/LZYq93qHkwo99Nv5gOfQUijVdv/tFECC88+j+BU+XqwbS5rFvu+02/vVf/zXxdZua+PrXv+7WZhgXBXmpCJMbpzl0C+gec3pSMV3gB5Ledvr/nM7Lvzs0EQhg+fLlXHzxxS5vhXGL0x28KS9OA7pd4MJjTtMlTi7GUL+ynpbdLVQ9VsW+v+3jZQ4F83Xr1vG5z32uAFtg3JTufIgFdJOJjdA95lbZX0dHB0OHDmXXrl0J7e+88w4nnHBCvt00RWIljSYZK1v0iXxPwH788cex6fnxwXz79u2oqgVznynFCivjHxbQXZLr2hy5Vjzs2LEDEekxe3Pv3r2oKkOGDMl5W4x3stnB23owpjtLubigmCey/vKXv/DpT386oW3AgAFs27aNysrKtH20ygl/cPJZ2cnT8mJVLkVUjOnv69ev54wzzkhoO+OMM1izZk3GGZ35fPltR1CavF5ywRSX5dCLqJBLBaxcuRIRSQjmEydORFVZu3ato+n5uS4G5faqgJYicE8pLk9hvGcB3QWFOJH15JNPIiIJNeO33HILqsrChQuzeq5cv/xurgpoS8a6q1xOntogIDsW0F3g5lIBDzzwACKSMIvzgQceQFV7zPZ0Ktcvv5ujQFsy1l3lsB6MDQKyZwHdBW6szXHLLbcgItx6662xtieffBJV5eabb86rf7l++Z3sCJyOoCxF4K5yWA/GBgHZs5OiBeTkhOLEiRNZtGhRQtuKFSsSVkIsVl+SPSbdydRsTrbaSTyTLZtkdYhVuXgsXbD7+ulf54wzzuDVV19NeMz69et7XGjCa+l2BNkEaSuzM9myQcAhFtA9lvSPsRPkfkE/SXzP85me72VZYbYjKCuBNNmwQcAhri7OJSL9gf8ETgcUuBZ4E5gPDAO2AFep6q4UT+EbbgWdhNzwfuDH4R/jA+CHH36YcP3OXPoa/wcfPWkEFOUPvqZfTdIRVKrce92IurL7IprcOVmQziRyNEIXkQbg96r6nyJSBfQG/hnYqar3isjtwABV/V665yn0CD3fYOzmiGDYnGG0vNcC9/e8be/evfTp0yer50v5Gh4ektoIypjicG1ikYj0A84HHgNQ1QOquhv4CtAQuVsDMCH37ubPjRInt86qv/POO7Tc3C2Y94bqu6tp3NDoSjBv2tiUNJhD8SpHyqHSwhg/yThCF5FRwFzgdWAksBaYDryrqv0j9xFgV/T3VAo5QndjtJrvWfVk66xU1VRxYOoBavvXuna4mGxkHK8cTxoZE2Ru5tB7AWcA31HV1SLyIHB7/B1UVUUk6Z5BRKYB0wBqago3i82NOudsc8JRGzZsYOTIkQltvUf2Zu68woxWkx1JxF43YJNLjDHOOZlY9Dfgb6q6OvL7QsIBfpuIHA0Q+Xd7sger6lxVHa2qowu5pKsbU6GznYDz+9//HhFJDOZXADNh3xX7mLxoMoPvG+z6zLZ0O6kpI6dQv7Le0VRpm1ZtTLBkDOiq+gHwVxE5JdI0hnD65dfAlEjbFGBJQXrokBtToZ3mhH/zm98gIpx//vmxtqHXDYWZhJNScVrbW12frpxqJzWoehANzQ2OziPYtGpjgsdplcsowmWLVcA7wFTCO4MFQA3QQrhscWe65yn1KpdMHn/8caZOnZrQ9uKLL3LeeeelzL9HuZnXTlVdUt2rmtb2Vkev7XWFTCFZvbsJGlfr0FX1VSDZk7k7Pz1Phapzvv/++/nud7+b0LZp0yZOO+202O+p8u9R8WmSfANOqvrcaxZdk/G107Wla/cLr2vzjfGSLc6Vgqpy2223ISKxYN6nTx+2bNmCqiYEc0ie8okXTZO4leqoG1HHlhlb6Lqziy0ztlA3oi6r8whBXX7VFnQy5cwCejcdHR184xvfIBQKxZarPeGEE9i+fTt79+6ltrY2djJR7hJ63d0LuUuoX1nPlJFTGFQ9qMdzxufyCxlwsjmPENTlV4N65GGMExbQIz755BMuueQSKisrmTdvHgDnnHMOe/bs4Z133olddDl+hA3QqZ1A+NC+obmBBy99kMYrG1OeWC1kwMlmok9QJwUF9cjDGCfKfnGutrY2vvSlLyWsfHj55ZezYMG98BpcAAAKuklEQVQCDjvssB73T3UyMSrTScUgn4wsBbYcgQmiwFxTtFC10h988AHHHHMM/fv3jwXz6667jo6ODpYsWZI0mEPmkXSm21Pl2vce2Ov7ksFSqGsP6pGHMU44qnLxSiEqFt5++21OOeUUOjs7Y2319fXcc889ji64nKmaJdOhfbTf05dNTygxjNarx9/HqVIo0yul6hJb1dGUq5Ieobt5ArG5uRkR4aSTTooF8zlz5qCq/PCHP3QUzCF9NYvTk4p1I+roW9W3R3su21YqE4SsusQY75V0QHfjBOKqVasQkYQrATU2NqKqTJ8+Pes+xR/SA1RIBUDWh/bZbFu6VEapBFKrLjHGeyWdcsl1sSyAJUuWMGFC4oq+S5cu5dJLL827X24c0jvdtkypDK8DaTTdk2qWrFWXGFM8JT1Cz6VW+he/+AUikhDM//jHP6KqrgRztzjdtkwjcC/L9LqXcHYXhLp2Y/ykpAN6NhULP/nJTxARvvnNb8baXnvtNVSVc889t5jddsTptmUagXs5QSjdMr5WXWJM8fm6Dl1VufXWW5k9e3as7fDDD2fTpk0FXXu9mNLVvdf2q02YgVrsKpd8LwhijHEmMHXoyXR0dDB58mRCoVAsmJ944ol8+OGH7NmzJzDBHNJX1cTn07uv61IMNivTmNLiq4De3t7O2LFjqayspKkpXOlx3nnn8dFHH/HWW28xePBgj3vovu5VNd1F8+leTOoJ6nowxviVL1IuHR0djB49mubm5ljbhAkTmD9/PlVVVW52saSlW3O9d2VvT6a7l8KkJmOCzmnKxRcBfdGiRUycOBGAadOm8cgjjxAK+ergwhWp8ukVUhFbJCyerQ9jTDAEKoc+fvx4Xn75Zbq6unj00UfLMphD6hRHsmAONqnHmHLji8h42GGHcdZZZzmenh9UqUodU+XX7eSkMeWlpGeKmp5SzVJNtmSsnZw0prz4YoRu0rMlY40x4JOTokFnlSLGmHScnhS1lIvHSmkdcWOMv1nKpQj8sPytMcb/bIReYKW+/K0xJjhshF5guS5/O7B6oOfX5zTG+IsF9ALLZfnbqooq9uzf4/ll5Ywx/lKWAb2YC1llWpEwWcnh4VWHc7DrYML9La9ujMmk7AJ6sS+q7GRFwroRdcwaM4uafjVsbdtKa3tr0ueyvLoxJp2yC+jFripxMumn+04mFZvKb4xJp+yqXLyoKsl0Uel0l3KLsqn8xphMym6EXopX2cm0M7Gp/MYYJ8ouoJfiVXbS7UwEKepl5Ywx/lV2Ab0UF7KaNWYWQvKlgS1vboxxquxy6JA5p11sdSPq+MPWP/DzNT9POCnq9ZGDMcZfym6EHs+LCyun8vD4h5l35bySOnIwxvhL2S6f232NFSjehZWNMSYbgbqmaCHYKofGmKAp24BuqxwaY4LGUUAXkS0islFEXhWRNZG2gSKyXEQ2R/4dUNiuuqsU69GNMSYf2YzQL1TVUXF5nNuBlao6HFgZ+d03SrEe3Rhj8pFPyuUrQEPk5wZgQv7dKZ5SrEc3xph8OKpyEZG/ALsABR5V1bkisltV+0duF2BX9PdUSqnKxRhj/MLti0R/QVXfFZGhwHIR+VP8jaqqIpJ0zyAi04BpADU1lp82xphCcZRyUdV3I/9uBxYDZwPbRORogMi/21M8dq6qjlbV0UOGDHGn18YYY3rIGNBFpI+IHB79GRgLbAJ+DUyJ3G0KsKRQnTTGGJOZk5TLkcDicJqcXsATqvqMiLwCLBCRbwItwFWF66YxxphMMgZ0VX0HGJmkvRUYU4hOGWOMyV7ZzhQ1xpigsYBujDEBYQHdGGMCwgK6McYEhAV0Y4wJCAvoJaSUrqBkjPGfsrymaCnqfgWllrYWpv1mGoAtGGaMccRG6CXCrqBkjMmXBfQSYVdQMsbkywJ6ibArKBlj8mUBvUTYFZSMMfmygF4i7ApKxph8ObpikVvsikXGGJM9p1csshG6McYEhAV0Y4wJCAvoBrBZqsYEgc0UNTZL1ZiAsBG6sVmqxgSEBXSPlUKqw2apGhMMFtA9FE11tLS1oGgs1VHsoG6zVI0JBgvoHiqVVIfNUjUmGCyge6hUUh02S9WYYLAqFw/V9Kuhpa0laXux1Y2oswBujM/ZCN1DluowxrjJArqHLNVhjHGTLc5ljDElzhbnMsaYMmMB3RhjAsICujHGBIQFdGOMCQgL6MYYExBFrXIRkQ+BnjNpcjMY2OHSc5WaoG5bULcLgrttQd0u8Ne21arqkEx3KmpAd5OIrHFSxuNHQd22oG4XBHfbgrpdEMxts5SLMcYEhAV0Y4wJCD8H9Lled6CAgrptQd0uCO62BXW7IIDb5tscujHGmER+HqEbY4yJU/IBXUSOF5HnReR1EXlNRKZH2geKyHIR2Rz5d4DXfc1Wmm27X0T+JCIbRGSxiPT3uq/ZSrVtcbffKiIqIoO96mMu0m2XiHwn8rm9JiL3ednPXKT5exwlIi+JyKsiskZEzva6r9kQkU+JyMsi0hzZrrsi7SeIyGoReUtE5otIldd9zZuqlvT/wNHAGZGfDwf+DHwGuA+4PdJ+O/ATr/vq4raNBXpF2n8SpG2L/H488CzhOQmDve6rS5/ZhcAK4LDIbUO97quL2/Y74NJI+zjgBa/7muV2CdA38nMlsBo4F1gATIq0/xz4ttd9zff/kh+hq+r7qrou8vNHwBvAscBXgIbI3RqACd70MHeptk1Vf6eqHZG7vQQc51Ufc5XmcwOYDXwX8N0JnDTb9W3gXlXdH7ltu3e9zE2abVPgiMjd+gHvedPD3GjY3sivlZH/FbgIWBhp92UM6a7kA3o8ERkGfI7wHvZIVX0/ctMHwJEedcsV3bYt3rXAsmL3x03x2yYiXwHeVdVmTzvlgm6f2cnAFyOH8P8jImd52bd8ddu2GcD9IvJX4KfA973rWW5EpEJEXgW2A8uBt4HdcQOnv3FowOFbvgnoItIXeBqYoap74m/T8DGT70Z7Uam2TUTqgQ6gyau+5St+2whvyz8Dd3jaKRck+cx6AQMJH8r/P2CBiIiHXcxZkm37NnCzqh4P3Aw85mX/cqGqnao6ivDR7tnAqR53qSB8EdBFpJLwH1iTqi6KNG8TkaMjtx9NeM/rOym2DRH5P8BlQF1kh+U7SbbtROAEoFlEthD+cq0TkaO862X2UnxmfwMWRQ7vXwa6CK8V4isptm0KEP35KcIB0ZdUdTfwPHAe0F9EekVuOg5417OOuaTkA3pklPMY8IaqPhB3068J/6ER+XdJsfuWr1TbJiKXEM4xX66q+7zqXz6SbZuqblTVoao6TFWHEQ6CZ6jqBx52NStp/h5/RfjEKCJyMlCFfxZ+AtJu23vABZGfLwI2F7tv+RCRIdFKMRGpBr5M+PzA88BXI3fzZQzpruQnFonIF4DfAxsJj3ogfNi+mvBZ6hrC1RJXqepOTzqZozTb9hBwGNAaaXtJVa8vfg9zl2rbVHVp3H22AKNV1TeBL81ntgL4BTAKOADcpqrPedLJHKXZtj3Ag4TTSp8AN6jqWk86mQMR+Szhk54VhAexC1T1bhH5NPBfhFNl64HJ0ZPaflXyAd0YY4wzJZ9yMcYY44wFdGOMCQgL6MYYExAW0I0xJiAsoBtjTEBYQDfGmICwgG6MMQFhAd0YYwLi/wM6kA1aREXRBwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from matplotlib import pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "plt.scatter(train_X['male_BMI'], train_y,color='g')\n", + "plt.plot(train_X['male_BMI'], model2.predict(train_X['male_BMI'].reshape(-1, 1)),color='k')\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "celltoolbar": "Slideshow", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}