1
0
Fork 0
Python2018/labs07/sklearn.ipynb

486 lines
25 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"# Analiza danych w Pythonie: sklearn\n",
"\n",
"### Tomasz Dwojak\n",
"\n",
"### 3 czerwca 2018"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
" * Pierwsza część: pandas\n",
" * Druga część: sklearn"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"### Przypomnienie z UMZ\n",
" * przygotowanie i czyszczenie danych\n",
" * wybór i trening modelu\n",
" * tuning\n",
" * ewaluacja"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"import sklearn\n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"data = pd.read_csv(\"./gapminder.csv\", index_col=0)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>female_BMI</th>\n",
" <th>male_BMI</th>\n",
" <th>gdp</th>\n",
" <th>population</th>\n",
" <th>under5mortality</th>\n",
" <th>life_expectancy</th>\n",
" <th>fertility</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Afghanistan</th>\n",
" <td>21.07402</td>\n",
" <td>20.62058</td>\n",
" <td>1311.0</td>\n",
" <td>26528741.0</td>\n",
" <td>110.4</td>\n",
" <td>52.8</td>\n",
" <td>6.20</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Albania</th>\n",
" <td>25.65726</td>\n",
" <td>26.44657</td>\n",
" <td>8644.0</td>\n",
" <td>2968026.0</td>\n",
" <td>17.9</td>\n",
" <td>76.8</td>\n",
" <td>1.76</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Algeria</th>\n",
" <td>26.36841</td>\n",
" <td>24.59620</td>\n",
" <td>12314.0</td>\n",
" <td>34811059.0</td>\n",
" <td>29.5</td>\n",
" <td>75.5</td>\n",
" <td>2.73</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Angola</th>\n",
" <td>23.48431</td>\n",
" <td>22.25083</td>\n",
" <td>7103.0</td>\n",
" <td>19842251.0</td>\n",
" <td>192.0</td>\n",
" <td>56.7</td>\n",
" <td>6.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Antigua and Barbuda</th>\n",
" <td>27.50545</td>\n",
" <td>25.76602</td>\n",
" <td>25736.0</td>\n",
" <td>85350.0</td>\n",
" <td>10.9</td>\n",
" <td>75.5</td>\n",
" <td>2.16</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" female_BMI male_BMI gdp population \\\n",
"Afghanistan 21.07402 20.62058 1311.0 26528741.0 \n",
"Albania 25.65726 26.44657 8644.0 2968026.0 \n",
"Algeria 26.36841 24.59620 12314.0 34811059.0 \n",
"Angola 23.48431 22.25083 7103.0 19842251.0 \n",
"Antigua and Barbuda 27.50545 25.76602 25736.0 85350.0 \n",
"\n",
" under5mortality life_expectancy fertility \n",
"Afghanistan 110.4 52.8 6.20 \n",
"Albania 17.9 76.8 1.76 \n",
"Algeria 29.5 75.5 2.73 \n",
"Angola 192.0 56.7 6.43 \n",
"Antigua and Barbuda 10.9 75.5 2.16 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"y = data['life_expectancy']\n",
"X = data.drop('life_expectancy', axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"train_X, test_X, train_y, test_y = \\\n",
" train_test_split(X, y, test_size=0.2, random_state=123, shuffle=True)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/plain": [
"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.linear_model import LinearRegression\n",
"model = LinearRegression()\n",
"model.fit(X,y)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/plain": [
"array([67.56279809, 76.25840076, 50.21126326, 59.21303855, 72.06348723])"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"predicted = model.predict(test_X)\n",
"predicted[:5]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"RMSE: 3.5179543848147863\n"
]
}
],
"source": [
"from sklearn.metrics import mean_squared_error\n",
"rmse = np.sqrt(mean_squared_error(predicted, test_y))\n",
"print(\"RMSE:\", rmse)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0.795295000468209"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
" r2 = model.score(test_X, test_y)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"#### API\n",
" * model\n",
" * `fix`\n",
" * `predict`"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"female_BMI: -1.18\n",
"male_BMI: 1.46\n",
"gdp: 5.11e-05\n",
"population: 7.21e-10\n",
"under5mortality: -0.159\n",
"fertility: 0.421\n"
]
}
],
"source": [
"for p in zip(train_X.columns, model.coef_):\n",
" print(\"{}: {:.3}\".format(p[0], p[1]))"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/lib/python3.6/site-packages/ipykernel_launcher.py:2: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead\n",
" \n"
]
},
{
"data": {
"text/plain": [
"LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model2 = LinearRegression()\n",
"model2.fit(train_X['male_BMI'].reshape(-1, 1), train_y)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0.5852413468462743"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model2.intercept_"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/lib/python3.6/site-packages/ipykernel_launcher.py:5: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead\n",
" \"\"\"\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xt8VPWZ+PHPMyGxAZQ73hOsom6VQhVv21arWF4KrkVpXWyw/LCVtfqr4OXX2k1XUUtrtStod7Wya2teJLogQmm7oAXUpVsrysUAai1qCa0XkABBJAJJnt8fc2EmmcuZmTNz5px53r58kXzn9j0zmed8z3Oe7/eIqmKMMcb/Ql53wBhjjDssoBtjTEBYQDfGmICwgG6MMQFhAd0YYwLCAroxxgSEBXRjjAkIC+jGGBMQFtCNMSYgehXzxQYPHqzDhg0r5ksaY4zvrV27doeqDsl0v6IG9GHDhrFmzZpivqQxxvieiLQ4uZ+lXIwxJiAsoBtjTEBYQDfGmICwgG6MMQFhAd0YYwLCAroxxgSEBXRjjAkIC+jGGFMgqsodd9zB8uXLi/J6RZ1YZIwx5aCjo4OpU6fS2NgIgIjQ1dVV8Ne1Eboxxrikvb2dsWPHUllZGQvm55xzDm1tbUV5fRuhG2NMntra2rjgggtobm6OtV1++eUsWLCAww47rGj9sBG6Mcbk6IMPPuCYY46hf//+sWB+3XXX0dHRwZIlS4oazMECujHGZO3tt9+mV69eHH300bz//vsA1NfX09XVxdy5c6moqPCkX5ZyMcYYh5qbmxk1alRC25w5c5g+fbpHPUpkAd0YYzJYtWoVF1xwQUJbY2MjdXV1HvUoOQvoxhiTwq9+9SuuuOKKhLalS5dy6aWXetSj9CyHbowx3Tz22GOISEIwf/HFF1HVkg3mYAHdGGNi7r33XkSEb33rW7G21157DVXlvPPO87BnzljKxRhT1lSVW2+9ldmzZ8faDj/8cDZt2kRNTY2HPcueBXRjTFnq6OhgypQpPPHEE7G2E088kZdeeonBgwd72LPcWUA3xpSV9vZ2Lr/8clasWBFrO++88/jd735H3759PexZ/iygG2PKwu7duzn//PPZuHFjrG3ChAnMnz+fqqoqD3vmHjspaowJtPfff5+jjjqKAQMGxIL5tGnT6OzsZPHixYEJ5mAB3RgTUG+99RahUIhjjjmGbdu2AfAv//IvdHV18eijjxIKBS/8BW+LjAGaNjYxbM4wQneFGDZnGE0bm7zuUsGU07Y6sX79ekSE4cOHo6oAPPTQQ6gqd999NyLicQ8Lx3LoJnCaNjYx7TfT2HdwHwAtbS1M+800AOpGlNZU7XyV07Zm8sILL3DhhRcmtD3xxBNcffXVHvWo+GyEbgKnfmV9LMBF7Tu4j/qV9UV5/WKOmL3e1lKwePFiRCQhmD/zzDOoalkFc7ARugmgrW1bs2p3U7FHzF5uq9f+4z/+g2nTpiW0/fGPf+Tcc8/1qEfesxG6CZyafsln96Vqd5OTEbObI3in2xqkPPuPfvQjRCQhmL/++uuoalkHc7CAbgJo1phZ9K7sndDWu7I3s8bMKvhrZxoxR0fwLW0tKBobwecaYJ1sq9uv6QVVZcaMGYgI9fXhnWO/fv3YunUrqsrf/d3fedzD0mAB3QRO3Yg65v7DXGr71SIItf1qmfsPc11JeWQa6WYaMbud83ayrdOXTfdtnr2jo4Orr76aUCjEgw8+CMDw4cPZsWMHu3fv5vjjj/e4h6VFomU9xTB69Ghds2ZN0V7PGDd1z49DeDQcH0Az3Sd0Vwil53dOELru7HK9v9OXTae1vTXp7YV4Tbe0t7dz2WWX8dxzz8XaPv/5z/PMM8/4fnp+LkRkraqOznQ/G6Eb45CT0XWmEXOx8vtNG5u4dsm1KYM5QEhCJZdT3717NyNGjKB3796xYH7FFVewf/9+/vd//7csg3k2LKAb45DTipK6EXVsmbGFrju72DJjS0L6w838frr0z/Rl0znQeSDt4zu1s2Ry6u+99x5Dhw5lwIABbNq0CYDrr7+ezs5OFi1aFKjp+YVkAd0Yh1KNorMZ6daNqGPKyClUSPiq8BVSwZSRU7LO72c60ZluZJ7MvoP7mLJ4StGD+ubNmxERjj32WD788EMA7rjjDrq6unjkkUcCOT2/kBy9WyJys4i8JiKbRORJEfmUiJwgIqtF5C0RmS8itgs1gZZsdA3ZjXSbNjbR0NxAp3bGHtvQ3JB1IE2V/pm+LPerz3dqZ9FG6uvWrUNEOPnkk2Nt//Zv/4aqctdddwV6en4hZQzoInIscBMwWlVPByqAScBPgNmqehKwC/hmITtqjNe658ejo+x4mapH3KpySZX+aW1vpWljE4OqB2X1fPn0JRvPP/88IsKZZ54Za3vyySdRVW688caCvW65cHo80wuoFpFeQG/gfeAiYGHk9gZggvvdM6a0xOfHuzR5hUi6WZotbS1p251OAEp3ErV+ZT1XnXZVj/bKUKWjQF+IWaZPP/00IsJFF10Ua3v22WdRVSZNmuT665WrjAFdVd8FfgpsJRzI24C1wG5V7Yjc7W/AsckeLyLTRGSNiKyJ5siMCYJcKlaSjeohXEI4+L7BTF402dEEoHQnUVvaWmhobujx/N8641vsbN+Z8nFO+p+tuXPnIiJ89atfjbWtXr0aVWXs2LGuvY4Jc5JyGQB8BTgBOAboA1zi9AVUda6qjlbV0UOGDMm5o8Z4Id2IOZeKlWjuvDtFk57ITJUCqRtRl3K0XSEVPdI6irJ089KMwdqtGbWzZs1CRPinf/qnWNsbb7yBqnL22Wfn/fwmOScpl4uBv6jqh6p6EFgEfB7oH0nBABwHvFugPhrjiUyVJLnMSK3tV5t1P1KlQB689MGkO5RUO42tbVuT7oQEifUtnxm1XV1d3HTTTYgIP/jBDwDo378/f/3rX1FVTj311Jye1zjnJKBvBc4Vkd4SPvU8BngdeB6IHkdNAZYUpovGeMPpRKJoTn3WmFnUr6xPm/9OVSmTTqpRdaodSqqdRk2/mqSPmXflPPRO7VEz79TBgweZNGkSFRUV/OxnPwPg5JNPZseOHezatYvjjjsu6+c0ucm4fK6qrhaRhcA6oANYD8wF/hv4LxH5YaTtsUJ21Jhiy2ZpWqfL5kZ/rl9Zz9a2rdT0q2Hvgb0p68YzpUDqRtQlDcLdlx8QhHHDx6V9TLb27dvHZZddxvPPPx9r++IXv8iyZcvo06dP3s9vsueoykVV71TVU1X1dFW9RlX3q+o7qnq2qp6kql9T1f2F7qwpvCAts5qvbE56ZlOO2H0mabLUCcCg6kE91olx8tlEJy9FUykQzqHnUu+ezK5duzjttNPo06dPLJhPnDiR/fv3s2rVKgvmHrJpWCbGT8usFmPHk81Jz3wuNJEsDdJ4ZSM7vrujx6JfTj+bpZuX9lgELN8a8/fee48hQ4YwcOBAXn/9dQBuuOEGOjs7WbhwoU3PLwG22qKJGTZnWNI66dp+tWyZsaX4HUrByaqHbr5WfHpk1phZSV+j0O9dts/v5qqOf/7znznllFMS2q789pUs/PeFNqOzSGy1RZM1v1zOrJjX0Uy30FY8NxbdSnfUke6zSfY4N1Z1XLt2LSKSGMzHATPhmeOe4YlNTzh+LlMcFtBNjJeXbstGKe54cr2oRjQYy13CNYuuSZlSSfUZDKwemDQVM274uKx2MPE7hSNvPBIRYfTouAHhV4GZQKSE3C8XyCg3FtBNjJeXbstGqe54uo/mgbR5/vi8OJA2553qs4ner/vjlm5e6ngHE+vHiy3oTGX7w9tjty1fvhyZKXB6z+0t5A7UTs7nJmPZoikfyUrqUuWMvTRrzKykOfRS2vE4KWNMljrqLho0U3021yy6JuXjnJYn3nT3Texb2K0f10HtZ2q5+OKLqdlUkzR/X6gdqNMSUNOTjdBNAqc5Yy8V8pqhbnGS53cywo0Pmsk+m0xHK6lGuqrKPffcg4iwc2Hc+i7/l3Bq5dhD/ct05Ob2aLqY50iCxqpcjCmAVFUmEK5M2dq2lZCEUk7TB2eVO+kqfqDnBKPqimq+8PoXWP7k8kN97R2i6/ouOKJnP6Opo1TVPoWoOCrmdVf9wmmViwV0YwogVZmhICkDffzttf1qHae7UgXbhD50Ak8TXrQj4tRTT+UPf/gDy95dlnNQLkS5pl/KZ4vJaUC3HLoJDKc148Uwbvg4fr7m5wnBO1Mwr5AKpp05jYfHP5zVa6XKlW9t2woHgCYgPj7Wwt7X9sZmdNYNzP3cSSEqjvxwjqRUWUA3gVBKJ9Kil5nrHrzTBXM4dDm6z9d8Pu8+79y5k4qHK+jY3nGo8TPARKgdWNtjen6u67vU9HP/hKlfTs6XIku5mEAopcP0VH1xKp8+v/vuu3z2s59l5864E51nE76CQcj9GbXFnLVbzmymqCkrpTTZKN/XzOXxb775JiLCcccdFwvmd999N/Oa51F7dS0Syq0aKFMFix8qjsqJBXRT8pyUxRVyslG2ZXn5vmZIQo5L/9asWYOIJFw84pFHHkFV+fSET/OD535AS1sLIQnR0tZC/cp6x8/tdEEwP5S6lgsL6KakOQ0qhZrlmssKlLlcxCJep3ZmfI0VK1YgIpx11lmxtgULFqCqXH/99T1moUbLI7NZQdPqwf3HAropuHwmnjgNKoU69M8lqHXvS9+qvlm/bqrXeOqppxARvvzlL8faVqxYgaryta99LW2/nfY/qpTSWMYZq3IxBZVv9Uk2QcWtK/HElz+mqkzJFNTi+zJszjD2HtibdT+ir9G0sYmb7r4pcUYn8MorryQuoJVF/5zOUi3mlH+TPxuhm4LK97A9VfAISaggCzd1T7Fk269kch3RHn/E8Uy8YSKTPzs5IZh/6uZP0bihMWUwd9I/J/33y2Jt5hAL6Kag8j1sT5WP7tTOglxVycmCWdkGtVTBc1D1oNgFneMvF0cX9FrWi623bGXRI4siLwrcAsyET/p9knGHmC6P77T/VsHiP1aHbgrKjfrw+BRIqvVP3Ko3T7cGiyBZTXKJ9rulraXHLNH49Vai9wl1heh6qgveiHuSIcC1QHXPvmRa1+SG/76BuWvnJrxf2SwpYEqH1aH7TFDXf3bjsD2+LK5LkwcxN07UNW1sIiTJvxK1/WqzKstLttZ5dBQeHelCePGslg9b4BfQdfehYH7hhRfy8ccfU/vPtT2COYQvbJFprfWG5oaEYB593y2YB5cF9BLgp4szZ8vtw/ZC1ZtHP4Nko/9c8sbJUjfdR/63//p29j2wD34ERPdHp0HNT2t47rnn6N27d9IdYmWoko8OfJT278VKDsuTpVxKQClNWy91hZpqnuozqJAKGq5oyPq506Vu2A50X38rbnp+93RK90XH9h7YS2t7a4+njf97sSVog8VWW/QRq/d1rlALN6V6rzu1MzaqzeY1kpb8bQV+0e2OFwHn93xsvO7lmKG7kh9Yx2+DlRyWJ0u5lIBSvUZmqcp3qnmy8xXp3uuWtham/moqg+8b7PgcR0Kq5A3CVwGKD+ZnhtvkfEl4nJP0jpO/Fys5LE8W0EuAffkKKz6AD75vMNcuubZH/nnc8HFpp+sf7DpIa3ur43McdSPq+Mf2fwwH8vlxN1xIuO0feq6PLghTRk7JuINy8vdiJYflyXLoJaKULs4QJMly7slEy/miJYROpDrHcccdd3DPPfcktFVdUcWBkQdiv6e62IXT8yb291Je7BJ0xuB8bfL4k4W5PAZg6tSpPP744wn3+e1vf8v48eMTatIrpCLltUTtpKVJxurQjcH5ieWB1QNjPztdLbGmXw2qygUXXICIJATzl156CVVl/PjxQDgFEn3edBeGtvMmJh8W0E2gOQ2Qe/bvieXEu+efB1UPoqqiKuH+1RXVfHTvR4RCIVatWhVrf/PNN1FVzjnnnB6vkWlZATtvYvJlZYsmsJo2Njle5fBg10GmL5ueMi8dS5nsaIFZ0B75L+r999/nqKOOSvsa6Y4WbEq+cYMFdBNIqU6GDqoelHRSDkBre2vstu7L/I47bhyTb57c4zFtbW0cccQRjvo0sHpgxglBxuTDUi4mkFKlN/pW9Y2tcJjJvoP7+N7T30NEGDjwUI69qqqK/fv3o6qOg3nTxiY+OvBRj/bKUKWlWYxrLKCbkuPGQmXpZt86Oum5HZgJ7971bqzp1FNPpbOzk/3791NVVZXyocnUr6znQOeBHu1HHHaEpVmMayygm5Li1kJl6WZTJpt0M6h6UPgOLYQn/sSttXLJJZegqrzxxhuEQrl9ZVLtYHa270zabkwuLKCXAT8tzevWKoGpRuF7D+ylaWNTj+UDJldODgfyXx66b6+ze9G4oZFly5blsCWJ3FrewU+fpSk+C+gB57eled1aqCw6Co+NvCNa21sTtv/hhx9GRHjwlgcP3ekiqJ1dy+P/+bhr6RA3lnfw22dpis8CesC5vS52oUeIbi5UVjeijr5VfXu07zu4jxtuvQER4cYbb4y1//KXv0RV0ZWa06JfmfqS79oqtsa5/xT7iCpjQBeRU0Tk1bj/94jIDBEZKCLLRWRz5N8BBe2pyYmbS/MWY4To9kJlPbZzETAT9izfE2taunQpjRsambl7ZkG/eE5WiUwXAGyZZX/x4ogqY0BX1TdVdZSqjiK86Oc+YDFwO7BSVYcDKyO/mxLj5oi3GCPEglzhSIHHCOfINxy6bfXq1agqO4/b6doXL58RWaYAYMss+4sXR1TZplzGAG+ragvwFaAh0t4ATHCzY8YdyUa8gjBu+Lisn6tYI8R81zuP6uzspO1HbXAX8NdD7Z+6+VM0bmjk7LPPBtz74uU7IsvUD1tm2V+8OKLKNqBPAp6M/Hykqr4f+fkD4EjXemVcUzeijikjp8QuUAzha1s2NDe4WgoIpVOB0d7ejojQq1cvdn+4+9ANt4ZPdk4dM5X6lfWxfqZaWTHbL16+O4ZMAcDWOPcXL46oHC+fKyJVwHvAaaq6TUR2q2r/uNt3qWqPPLqITAOmAdTU1JzZ0uJsrWnjHreuWZruep5AQa71mY2dO3cyaNCgHu179uzh8MMPB5JvQ6q1ySukgi7tcrzeeL7X8bRrywaLm9e/LcTyuZcC61R1W+T3bSJydOTFjiY8t64HVZ2rqqNVdfSQIUOyeDnjFrdLAZONEL2swNi6dSsikhDMq6urY9Pzo8Ecko+iFU04gonq1M6sUif5jsgspRIsXhxRZTNC/y/gWVX9ZeT3+4FWVb1XRG4HBqrqd9M9h13gwhvFGPl5cZX5TZs2MWLEiIS2z3zmM2zcuDHljM5U/YTw+7G1bSshCSVdszzT++XGiMyuRGSScXWELiJ9gC8TLvqKuhf4sohsBi6O/G5KUDFGfsXMF65atQoRSQjm48ePR1V57bXX0k7PT9WfaLDuurOLLk2+A8p0ROPGiMytE8KmPDkK6Kr6saoOUtW2uLZWVR2jqsNV9WJV9e2iFKVyMq9QinHoV4ydxtNPP42IcMEFF8TabrzxRlSV3/72t671M5+dkwVk4ylVLdr/Z555phZT44ZGrZ1dqzJTtHZ2rTZuaEx6n96zeiszif3fe1bvpPc16Tl5v3Pxs5/9TAlXk8f+//GPf1ywftrfhCk1wBp1EGMDe5Fop/lMqywoXd///ve5997ETF5DQwPf+MY3Cv7alss+xN4L75X9RaKdVl0EbTp1ENJHdXV1iEhCMH/mmWdQVdeDear3y2nqJAjvdzq2IJi/BPYSdE4DdU2/mqQjdD9Op+5+VNL9MmqlTFX5+7//e1566aWE9ldeeYXRozMOTHKS7/vl5/fbqXQDo6BsY5AEdoTu9MRWkGp//bgaX0dHB0cddRShUCghmG/evBlVLVgwh/zfLz++39kK2hFs0AU2oDsN1KU2nTqfQ3g/ffn27duHiFBZWcm2bdti7R988AGqykknnVTwPuT7fjl5vN9TMrYgmL8ENuUSDchOTubUjagricPHfA/h/ZA+am1tZfDgwT3a46fnF0u+71emxwchJTNrzKykxQV+PIItB4EdoYP/aoLzPYQv5fRRS0sLIpIQzPv06cOBAwd6TM8vlnzfr0yPD0JKptSOYE16gR2h+1G+KYBsjkqKZcOGDYwcOTKhbcSIETQ3NyPSc/2UYsr3/cr0eD+lwNIplSNYk1lg69D9KEg18S+88AIXXnhhQtvll1/OkiVLPOpR8QXp8zTeKvs69GyVwsmrUk6ZOPXUU08hIgnB/Dvf+Q6qWlbBHILxeRp/sYBO6Uye8HO+8qGHHkJEuOqqq2Jt9913H6rKQw895GHPvOPnz9P4k6VcKPyhcZCnTn/ve9/jvvvuS2ibN28ekydP9qhHxgSPpVyyUMiTV4Ua/XudIpo0aRIikhDMn332WVTVgrkxHrGATmEnTxSidM2rFJGqcs455yAizJ8/P9a+Zs0aVJWxY8cW9PWNMelZQKewJ68KMfovdn1zR0cHQ4YMIRQK8fLLL8fa33rrLVSVP1X9yfMTysYYC+hAYU9eFWL0X6z65vjp+Tt27Ii1b9u2DVXlxBNPLIkTyl6nn3Lhxz6b0mcBPaL7rFLAlS9cIUb/hV5fo7W1FRGhT58+Ce0fffQRqsrQoUNjbV7PhiyFHUq2/Nhn4w8W0JNw8wtXiNF/oVJEW7Zs6TE9/4gjjohNz+/bt2+Px3g9G9LrHUou/Nhn4w9WtphEqc7wiy9/HFg9EICd7Tup6VfDuOHjWLp5aU6lkc3NzYwaNSqhbdSoUaxbty7j9Hyv36vQXSGUnn/DgtB1Z/KLPXvNj3023rKyxTx4PepMpvtRQ2t7K+0d7cy7ch6zxsyiobkh6yOK5557DhFJCOYTJkxAVVm/fr2jtVa8ng3px+Vd/dhn4w8W0JMoxS9cusP0bA/h58+fj4gwZsyYWNuMGTNQVRYvXpxVv7yeDen1DiUXfuyz8QcL6EkU+wvnpOIh3VGD0yOKOXPmICJMmjQp1vbTn/4UVWX27Nk599/LZYq93qHkwo99Nv5gOfQUijVdv/tFECC88+j+BU+XqwbS5rFvu+02/vVf/zXxdZua+PrXv+7WZhgXBXmpCJMbpzl0C+gec3pSMV3gB5Ledvr/nM7Lvzs0EQhg+fLlXHzxxS5vhXGL0x28KS9OA7pd4MJjTtMlTi7GUL+ynpbdLVQ9VsW+v+3jZQ4F83Xr1vG5z32uAFtg3JTufIgFdJOJjdA95lbZX0dHB0OHDmXXrl0J7e+88w4nnHBCvt00RWIljSYZK1v0iXxPwH788cex6fnxwXz79u2oqgVznynFCivjHxbQXZLr2hy5Vjzs2LEDEekxe3Pv3r2oKkOGDMl5W4x3stnB23owpjtLubigmCey/vKXv/DpT386oW3AgAFs27aNysrKtH20ygl/cPJZ2cnT8mJVLkVUjOnv69ev54wzzkhoO+OMM1izZk3GGZ35fPltR1CavF5ywRSX5dCLqJBLBaxcuRIRSQjmEydORFVZu3ato+n5uS4G5faqgJYicE8pLk9hvGcB3QWFOJH15JNPIiIJNeO33HILqsrChQuzeq5cv/xurgpoS8a6q1xOntogIDsW0F3g5lIBDzzwACKSMIvzgQceQFV7zPZ0Ktcvv5ujQFsy1l3lsB6MDQKyZwHdBW6szXHLLbcgItx6662xtieffBJV5eabb86rf7l++Z3sCJyOoCxF4K5yWA/GBgHZs5OiBeTkhOLEiRNZtGhRQtuKFSsSVkIsVl+SPSbdydRsTrbaSTyTLZtkdYhVuXgsXbD7+ulf54wzzuDVV19NeMz69et7XGjCa+l2BNkEaSuzM9myQcAhFtA9lvSPsRPkfkE/SXzP85me72VZYbYjKCuBNNmwQcAhri7OJSL9gf8ETgcUuBZ4E5gPDAO2AFep6q4UT+EbbgWdhNzwfuDH4R/jA+CHH36YcP3OXPoa/wcfPWkEFOUPvqZfTdIRVKrce92IurL7IprcOVmQziRyNEIXkQbg96r6nyJSBfQG/hnYqar3isjtwABV/V665yn0CD3fYOzmiGDYnGG0vNcC9/e8be/evfTp0yer50v5Gh4ektoIypjicG1ikYj0A84HHgNQ1QOquhv4CtAQuVsDMCH37ubPjRInt86qv/POO7Tc3C2Y94bqu6tp3NDoSjBv2tiUNJhD8SpHyqHSwhg/yThCF5FRwFzgdWAksBaYDryrqv0j9xFgV/T3VAo5QndjtJrvWfVk66xU1VRxYOoBavvXuna4mGxkHK8cTxoZE2Ru5tB7AWcA31HV1SLyIHB7/B1UVUUk6Z5BRKYB0wBqago3i82NOudsc8JRGzZsYOTIkQltvUf2Zu68woxWkx1JxF43YJNLjDHOOZlY9Dfgb6q6OvL7QsIBfpuIHA0Q+Xd7sger6lxVHa2qowu5pKsbU6GznYDz+9//HhFJDOZXADNh3xX7mLxoMoPvG+z6zLZ0O6kpI6dQv7Le0VRpm1ZtTLBkDOiq+gHwVxE5JdI0hnD65dfAlEjbFGBJQXrokBtToZ3mhH/zm98gIpx//vmxtqHXDYWZhJNScVrbW12frpxqJzWoehANzQ2OziPYtGpjgsdplcsowmWLVcA7wFTCO4MFQA3QQrhscWe65yn1KpdMHn/8caZOnZrQ9uKLL3LeeeelzL9HuZnXTlVdUt2rmtb2Vkev7XWFTCFZvbsJGlfr0FX1VSDZk7k7Pz1Phapzvv/++/nud7+b0LZp0yZOO+202O+p8u9R8WmSfANOqvrcaxZdk/G107Wla/cLr2vzjfGSLc6Vgqpy2223ISKxYN6nTx+2bNmCqiYEc0ie8okXTZO4leqoG1HHlhlb6Lqziy0ztlA3oi6r8whBXX7VFnQy5cwCejcdHR184xvfIBQKxZarPeGEE9i+fTt79+6ltrY2djJR7hJ63d0LuUuoX1nPlJFTGFQ9qMdzxufyCxlwsjmPENTlV4N65GGMExbQIz755BMuueQSKisrmTdvHgDnnHMOe/bs4Z133olddDl+hA3QqZ1A+NC+obmBBy99kMYrG1OeWC1kwMlmok9QJwUF9cjDGCfKfnGutrY2vvSlLyWsfHj55ZezYMG98BpcAAAKuklEQVQCDjvssB73T3UyMSrTScUgn4wsBbYcgQmiwFxTtFC10h988AHHHHMM/fv3jwXz6667jo6ODpYsWZI0mEPmkXSm21Pl2vce2Ov7ksFSqGsP6pGHMU44qnLxSiEqFt5++21OOeUUOjs7Y2319fXcc889ji64nKmaJdOhfbTf05dNTygxjNarx9/HqVIo0yul6hJb1dGUq5Ieobt5ArG5uRkR4aSTTooF8zlz5qCq/PCHP3QUzCF9NYvTk4p1I+roW9W3R3su21YqE4SsusQY75V0QHfjBOKqVasQkYQrATU2NqKqTJ8+Pes+xR/SA1RIBUDWh/bZbFu6VEapBFKrLjHGeyWdcsl1sSyAJUuWMGFC4oq+S5cu5dJLL827X24c0jvdtkypDK8DaTTdk2qWrFWXGFM8JT1Cz6VW+he/+AUikhDM//jHP6KqrgRztzjdtkwjcC/L9LqXcHYXhLp2Y/ykpAN6NhULP/nJTxARvvnNb8baXnvtNVSVc889t5jddsTptmUagXs5QSjdMr5WXWJM8fm6Dl1VufXWW5k9e3as7fDDD2fTpk0FXXu9mNLVvdf2q02YgVrsKpd8LwhijHEmMHXoyXR0dDB58mRCoVAsmJ944ol8+OGH7NmzJzDBHNJX1cTn07uv61IMNivTmNLiq4De3t7O2LFjqayspKkpXOlx3nnn8dFHH/HWW28xePBgj3vovu5VNd1F8+leTOoJ6nowxviVL1IuHR0djB49mubm5ljbhAkTmD9/PlVVVW52saSlW3O9d2VvT6a7l8KkJmOCzmnKxRcBfdGiRUycOBGAadOm8cgjjxAK+ergwhWp8ukVUhFbJCyerQ9jTDAEKoc+fvx4Xn75Zbq6unj00UfLMphD6hRHsmAONqnHmHLji8h42GGHcdZZZzmenh9UqUodU+XX7eSkMeWlpGeKmp5SzVJNtmSsnZw0prz4YoRu0rMlY40x4JOTokFnlSLGmHScnhS1lIvHSmkdcWOMv1nKpQj8sPytMcb/bIReYKW+/K0xJjhshF5guS5/O7B6oOfX5zTG+IsF9ALLZfnbqooq9uzf4/ll5Ywx/lKWAb2YC1llWpEwWcnh4VWHc7DrYML9La9ujMmk7AJ6sS+q7GRFwroRdcwaM4uafjVsbdtKa3tr0ueyvLoxJp2yC+jFripxMumn+04mFZvKb4xJp+yqXLyoKsl0Uel0l3KLsqn8xphMym6EXopX2cm0M7Gp/MYYJ8ouoJfiVXbS7UwEKepl5Ywx/lV2Ab0UF7KaNWYWQvKlgS1vboxxquxy6JA5p11sdSPq+MPWP/DzNT9POCnq9ZGDMcZfym6EHs+LCyun8vD4h5l35bySOnIwxvhL2S6f232NFSjehZWNMSYbgbqmaCHYKofGmKAp24BuqxwaY4LGUUAXkS0islFEXhWRNZG2gSKyXEQ2R/4dUNiuuqsU69GNMSYf2YzQL1TVUXF5nNuBlao6HFgZ+d03SrEe3Rhj8pFPyuUrQEPk5wZgQv7dKZ5SrEc3xph8OKpyEZG/ALsABR5V1bkisltV+0duF2BX9PdUSqnKxRhj/MLti0R/QVXfFZGhwHIR+VP8jaqqIpJ0zyAi04BpADU1lp82xphCcZRyUdV3I/9uBxYDZwPbRORogMi/21M8dq6qjlbV0UOGDHGn18YYY3rIGNBFpI+IHB79GRgLbAJ+DUyJ3G0KsKRQnTTGGJOZk5TLkcDicJqcXsATqvqMiLwCLBCRbwItwFWF66YxxphMMgZ0VX0HGJmkvRUYU4hOGWOMyV7ZzhQ1xpigsYBujDEBYQHdGGMCwgK6McYEhAV0Y4wJCAvoJaSUrqBkjPGfsrymaCnqfgWllrYWpv1mGoAtGGaMccRG6CXCrqBkjMmXBfQSYVdQMsbkywJ6ibArKBlj8mUBvUTYFZSMMfmygF4i7ApKxph8ObpikVvsikXGGJM9p1csshG6McYEhAV0Y4wJCAvoBrBZqsYEgc0UNTZL1ZiAsBG6sVmqxgSEBXSPlUKqw2apGhMMFtA9FE11tLS1oGgs1VHsoG6zVI0JBgvoHiqVVIfNUjUmGCyge6hUUh02S9WYYLAqFw/V9Kuhpa0laXux1Y2oswBujM/ZCN1DluowxrjJArqHLNVhjHGTLc5ljDElzhbnMsaYMmMB3RhjAsICujHGBIQFdGOMCQgL6MYYExBFrXIRkQ+BnjNpcjMY2OHSc5WaoG5bULcLgrttQd0u8Ne21arqkEx3KmpAd5OIrHFSxuNHQd22oG4XBHfbgrpdEMxts5SLMcYEhAV0Y4wJCD8H9Lled6CAgrptQd0uCO62BXW7IIDb5tscujHGmER+HqEbY4yJU/IBXUSOF5HnReR1EXlNRKZH2geKyHIR2Rz5d4DXfc1Wmm27X0T+JCIbRGSxiPT3uq/ZSrVtcbffKiIqIoO96mMu0m2XiHwn8rm9JiL3ednPXKT5exwlIi+JyKsiskZEzva6r9kQkU+JyMsi0hzZrrsi7SeIyGoReUtE5otIldd9zZuqlvT/wNHAGZGfDwf+DHwGuA+4PdJ+O/ATr/vq4raNBXpF2n8SpG2L/H488CzhOQmDve6rS5/ZhcAK4LDIbUO97quL2/Y74NJI+zjgBa/7muV2CdA38nMlsBo4F1gATIq0/xz4ttd9zff/kh+hq+r7qrou8vNHwBvAscBXgIbI3RqACd70MHeptk1Vf6eqHZG7vQQc51Ufc5XmcwOYDXwX8N0JnDTb9W3gXlXdH7ltu3e9zE2abVPgiMjd+gHvedPD3GjY3sivlZH/FbgIWBhp92UM6a7kA3o8ERkGfI7wHvZIVX0/ctMHwJEedcsV3bYt3rXAsmL3x03x2yYiXwHeVdVmTzvlgm6f2cnAFyOH8P8jImd52bd8ddu2GcD9IvJX4KfA973rWW5EpEJEXgW2A8uBt4HdcQOnv3FowOFbvgnoItIXeBqYoap74m/T8DGT70Z7Uam2TUTqgQ6gyau+5St+2whvyz8Dd3jaKRck+cx6AQMJH8r/P2CBiIiHXcxZkm37NnCzqh4P3Aw85mX/cqGqnao6ivDR7tnAqR53qSB8EdBFpJLwH1iTqi6KNG8TkaMjtx9NeM/rOym2DRH5P8BlQF1kh+U7SbbtROAEoFlEthD+cq0TkaO862X2UnxmfwMWRQ7vXwa6CK8V4isptm0KEP35KcIB0ZdUdTfwPHAe0F9EekVuOg5417OOuaTkA3pklPMY8IaqPhB3068J/6ER+XdJsfuWr1TbJiKXEM4xX66q+7zqXz6SbZuqblTVoao6TFWHEQ6CZ6jqBx52NStp/h5/RfjEKCJyMlCFfxZ+AtJu23vABZGfLwI2F7tv+RCRIdFKMRGpBr5M+PzA88BXI3fzZQzpruQnFonIF4DfAxsJj3ogfNi+mvBZ6hrC1RJXqepOTzqZozTb9hBwGNAaaXtJVa8vfg9zl2rbVHVp3H22AKNV1TeBL81ntgL4BTAKOADcpqrPedLJHKXZtj3Ag4TTSp8AN6jqWk86mQMR+Szhk54VhAexC1T1bhH5NPBfhFNl64HJ0ZPaflXyAd0YY4wzJZ9yMcYY44wFdGOMCQgL6MYYExAW0I0xJiAsoBtjTEBYQDfGmICwgG6MMQFhAd0YYwLi/wM6kA1aREXRBwAAAABJRU5ErkJggg==\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7fa465870198>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from matplotlib import pyplot as plt\n",
"%matplotlib inline\n",
"\n",
"plt.scatter(train_X['male_BMI'], train_y,color='g')\n",
"plt.plot(train_X['male_BMI'], model2.predict(train_X['male_BMI'].reshape(-1, 1)),color='k')\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"celltoolbar": "Slideshow",
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}