{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Analiza danych w Pythonie: sklearn\n", "\n", "### Tomasz Dwojak\n", "\n", "### 3 czerwca 2018" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ " * Pierwsza część: pandas\n", " * Druga część: sklearn" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Przypomnienie z UMZ\n", " * przygotowanie i czyszczenie danych\n", " * wybór i trening modelu\n", " * tuning\n", " * ewaluacja" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "import sklearn\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "data = pd.read_csv(\"./gapminder.csv\", index_col=0)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
female_BMImale_BMIgdppopulationunder5mortalitylife_expectancyfertility
Afghanistan21.0740220.620581311.026528741.0110.452.86.20
Albania25.6572626.446578644.02968026.017.976.81.76
Algeria26.3684124.5962012314.034811059.029.575.52.73
Angola23.4843122.250837103.019842251.0192.056.76.43
Antigua and Barbuda27.5054525.7660225736.085350.010.975.52.16
\n", "
" ], "text/plain": [ " female_BMI male_BMI gdp population \\\n", "Afghanistan 21.07402 20.62058 1311.0 26528741.0 \n", "Albania 25.65726 26.44657 8644.0 2968026.0 \n", "Algeria 26.36841 24.59620 12314.0 34811059.0 \n", "Angola 23.48431 22.25083 7103.0 19842251.0 \n", "Antigua and Barbuda 27.50545 25.76602 25736.0 85350.0 \n", "\n", " under5mortality life_expectancy fertility \n", "Afghanistan 110.4 52.8 6.20 \n", "Albania 17.9 76.8 1.76 \n", "Algeria 29.5 75.5 2.73 \n", "Angola 192.0 56.7 6.43 \n", "Antigua and Barbuda 10.9 75.5 2.16 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "y = data['life_expectancy']\n", "X = data.drop('life_expectancy', axis=1)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "train_X, test_X, train_y, test_y = \\\n", " train_test_split(X, y, test_size=0.2, random_state=123, shuffle=True)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/plain": [ "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.linear_model import LinearRegression\n", "model = LinearRegression()\n", "model.fit(X,y)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/plain": [ "array([67.56279809, 76.25840076, 50.21126326, 59.21303855, 72.06348723])" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predicted = model.predict(test_X)\n", "predicted[:5]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "RMSE: 3.5179543848147863\n" ] } ], "source": [ "from sklearn.metrics import mean_squared_error\n", "rmse = np.sqrt(mean_squared_error(predicted, test_y))\n", "print(\"RMSE:\", rmse)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/plain": [ "0.795295000468209" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ " r2 = model.score(test_X, test_y)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "#### API\n", " * model\n", " * `fix`\n", " * `predict`" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "female_BMI: -1.18\n", "male_BMI: 1.46\n", "gdp: 5.11e-05\n", "population: 7.21e-10\n", "under5mortality: -0.159\n", "fertility: 0.421\n" ] } ], "source": [ "for p in zip(train_X.columns, model.coef_):\n", " print(\"{}: {:.3}\".format(p[0], p[1]))" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/lib/python3.6/site-packages/ipykernel_launcher.py:2: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead\n", " \n" ] }, { "data": { "text/plain": [ "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model2 = LinearRegression()\n", "model2.fit(train_X['male_BMI'].reshape(-1, 1), train_y)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/plain": [ "0.5852413468462743" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model2.intercept_" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/lib/python3.6/site-packages/ipykernel_launcher.py:5: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead\n", " \"\"\"\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xt8VPWZ+PHPMyGxAZQ73hOsom6VQhVv21arWF4KrkVpXWyw/LCVtfqr4OXX2k1XUUtrtStod7Wya2teJLogQmm7oAXUpVsrysUAai1qCa0XkABBJAJJnt8fc2EmmcuZmTNz5px53r58kXzn9j0zmed8z3Oe7/eIqmKMMcb/Ql53wBhjjDssoBtjTEBYQDfGmICwgG6MMQFhAd0YYwLCAroxxgSEBXRjjAkIC+jGGBMQFtCNMSYgehXzxQYPHqzDhg0r5ksaY4zvrV27doeqDsl0v6IG9GHDhrFmzZpivqQxxvieiLQ4uZ+lXIwxJiAsoBtjTEBYQDfGmICwgG6MMQFhAd0YYwLCAroxxgSEBXRjjAkIC+jGGFMgqsodd9zB8uXLi/J6RZ1YZIwx5aCjo4OpU6fS2NgIgIjQ1dVV8Ne1Eboxxrikvb2dsWPHUllZGQvm55xzDm1tbUV5fRuhG2NMntra2rjgggtobm6OtV1++eUsWLCAww47rGj9sBG6Mcbk6IMPPuCYY46hf//+sWB+3XXX0dHRwZIlS4oazMECujHGZO3tt9+mV69eHH300bz//vsA1NfX09XVxdy5c6moqPCkX5ZyMcYYh5qbmxk1alRC25w5c5g+fbpHPUpkAd0YYzJYtWoVF1xwQUJbY2MjdXV1HvUoOQvoxhiTwq9+9SuuuOKKhLalS5dy6aWXetSj9CyHbowx3Tz22GOISEIwf/HFF1HVkg3mYAHdGGNi7r33XkSEb33rW7G21157DVXlvPPO87BnzljKxRhT1lSVW2+9ldmzZ8faDj/8cDZt2kRNTY2HPcueBXRjTFnq6OhgypQpPPHEE7G2E088kZdeeonBgwd72LPcWUA3xpSV9vZ2Lr/8clasWBFrO++88/jd735H3759PexZ/iygG2PKwu7duzn//PPZuHFjrG3ChAnMnz+fqqoqD3vmHjspaowJtPfff5+jjjqKAQMGxIL5tGnT6OzsZPHixYEJ5mAB3RgTUG+99RahUIhjjjmGbdu2AfAv//IvdHV18eijjxIKBS/8BW+LjAGaNjYxbM4wQneFGDZnGE0bm7zuUsGU07Y6sX79ekSE4cOHo6oAPPTQQ6gqd999NyLicQ8Lx3LoJnCaNjYx7TfT2HdwHwAtbS1M+800AOpGlNZU7XyV07Zm8sILL3DhhRcmtD3xxBNcffXVHvWo+GyEbgKnfmV9LMBF7Tu4j/qV9UV5/WKOmL3e1lKwePFiRCQhmD/zzDOoalkFc7ARugmgrW1bs2p3U7FHzF5uq9f+4z/+g2nTpiW0/fGPf+Tcc8/1qEfesxG6CZyafsln96Vqd5OTEbObI3in2xqkPPuPfvQjRCQhmL/++uuoalkHc7CAbgJo1phZ9K7sndDWu7I3s8bMKvhrZxoxR0fwLW0tKBobwecaYJ1sq9uv6QVVZcaMGYgI9fXhnWO/fv3YunUrqsrf/d3fedzD0mAB3QRO3Yg65v7DXGr71SIItf1qmfsPc11JeWQa6WYaMbud83ayrdOXTfdtnr2jo4Orr76aUCjEgw8+CMDw4cPZsWMHu3fv5vjjj/e4h6VFomU9xTB69Ghds2ZN0V7PGDd1z49DeDQcH0Az3Sd0Vwil53dOELru7HK9v9OXTae1vTXp7YV4Tbe0t7dz2WWX8dxzz8XaPv/5z/PMM8/4fnp+LkRkraqOznQ/G6Eb45CT0XWmEXOx8vtNG5u4dsm1KYM5QEhCJZdT3717NyNGjKB3796xYH7FFVewf/9+/vd//7csg3k2LKAb45DTipK6EXVsmbGFrju72DJjS0L6w838frr0z/Rl0znQeSDt4zu1s2Ry6u+99x5Dhw5lwIABbNq0CYDrr7+ezs5OFi1aFKjp+YVkAd0Yh1KNorMZ6daNqGPKyClUSPiq8BVSwZSRU7LO72c60ZluZJ7MvoP7mLJ4StGD+ubNmxERjj32WD788EMA7rjjDrq6unjkkUcCOT2/kBy9WyJys4i8JiKbRORJEfmUiJwgIqtF5C0RmS8itgs1gZZsdA3ZjXSbNjbR0NxAp3bGHtvQ3JB1IE2V/pm+LPerz3dqZ9FG6uvWrUNEOPnkk2Nt//Zv/4aqctdddwV6en4hZQzoInIscBMwWlVPByqAScBPgNmqehKwC/hmITtqjNe658ejo+x4mapH3KpySZX+aW1vpWljE4OqB2X1fPn0JRvPP/88IsKZZ54Za3vyySdRVW688caCvW65cHo80wuoFpFeQG/gfeAiYGHk9gZggvvdM6a0xOfHuzR5hUi6WZotbS1p251OAEp3ErV+ZT1XnXZVj/bKUKWjQF+IWaZPP/00IsJFF10Ua3v22WdRVSZNmuT665WrjAFdVd8FfgpsJRzI24C1wG5V7Yjc7W/AsckeLyLTRGSNiKyJ5siMCYJcKlaSjeohXEI4+L7BTF402dEEoHQnUVvaWmhobujx/N8641vsbN+Z8nFO+p+tuXPnIiJ89atfjbWtXr0aVWXs2LGuvY4Jc5JyGQB8BTgBOAboA1zi9AVUda6qjlbV0UOGDMm5o8Z4Id2IOZeKlWjuvDtFk57ITJUCqRtRl3K0XSEVPdI6irJ089KMwdqtGbWzZs1CRPinf/qnWNsbb7yBqnL22Wfn/fwmOScpl4uBv6jqh6p6EFgEfB7oH0nBABwHvFugPhrjiUyVJLnMSK3tV5t1P1KlQB689MGkO5RUO42tbVuT7oQEifUtnxm1XV1d3HTTTYgIP/jBDwDo378/f/3rX1FVTj311Jye1zjnJKBvBc4Vkd4SPvU8BngdeB6IHkdNAZYUpovGeMPpRKJoTn3WmFnUr6xPm/9OVSmTTqpRdaodSqqdRk2/mqSPmXflPPRO7VEz79TBgweZNGkSFRUV/OxnPwPg5JNPZseOHezatYvjjjsu6+c0ucm4fK6qrhaRhcA6oANYD8wF/hv4LxH5YaTtsUJ21Jhiy2ZpWqfL5kZ/rl9Zz9a2rdT0q2Hvgb0p68YzpUDqRtQlDcLdlx8QhHHDx6V9TLb27dvHZZddxvPPPx9r++IXv8iyZcvo06dP3s9vsueoykVV71TVU1X1dFW9RlX3q+o7qnq2qp6kql9T1f2F7qwpvCAts5qvbE56ZlOO2H0mabLUCcCg6kE91olx8tlEJy9FUykQzqHnUu+ezK5duzjttNPo06dPLJhPnDiR/fv3s2rVKgvmHrJpWCbGT8usFmPHk81Jz3wuNJEsDdJ4ZSM7vrujx6JfTj+bpZuX9lgELN8a8/fee48hQ4YwcOBAXn/9dQBuuOEGOjs7WbhwoU3PLwG22qKJGTZnWNI66dp+tWyZsaX4HUrByaqHbr5WfHpk1phZSV+j0O9dts/v5qqOf/7znznllFMS2q789pUs/PeFNqOzSGy1RZM1v1zOrJjX0Uy30FY8NxbdSnfUke6zSfY4N1Z1XLt2LSKSGMzHATPhmeOe4YlNTzh+LlMcFtBNjJeXbstGKe54cr2oRjQYy13CNYuuSZlSSfUZDKwemDQVM274uKx2MPE7hSNvPBIRYfTouAHhV4GZQKSE3C8XyCg3FtBNjJeXbstGqe54uo/mgbR5/vi8OJA2553qs4ner/vjlm5e6ngHE+vHiy3oTGX7w9tjty1fvhyZKXB6z+0t5A7UTs7nJmPZoikfyUrqUuWMvTRrzKykOfRS2vE4KWNMljrqLho0U3021yy6JuXjnJYn3nT3Texb2K0f10HtZ2q5+OKLqdlUkzR/X6gdqNMSUNOTjdBNAqc5Yy8V8pqhbnGS53cywo0Pmsk+m0xHK6lGuqrKPffcg4iwc2Hc+i7/l3Bq5dhD/ct05Ob2aLqY50iCxqpcjCmAVFUmEK5M2dq2lZCEUk7TB2eVO+kqfqDnBKPqimq+8PoXWP7k8kN97R2i6/ouOKJnP6Opo1TVPoWoOCrmdVf9wmmViwV0YwogVZmhICkDffzttf1qHae7UgXbhD50Ak8TXrQj4tRTT+UPf/gDy95dlnNQLkS5pl/KZ4vJaUC3HLoJDKc148Uwbvg4fr7m5wnBO1Mwr5AKpp05jYfHP5zVa6XKlW9t2woHgCYgPj7Wwt7X9sZmdNYNzP3cSSEqjvxwjqRUWUA3gVBKJ9Kil5nrHrzTBXM4dDm6z9d8Pu8+79y5k4qHK+jY3nGo8TPARKgdWNtjen6u67vU9HP/hKlfTs6XIku5mEAopcP0VH1xKp8+v/vuu3z2s59l5864E51nE76CQcj9GbXFnLVbzmymqCkrpTTZKN/XzOXxb775JiLCcccdFwvmd999N/Oa51F7dS0Syq0aKFMFix8qjsqJBXRT8pyUxRVyslG2ZXn5vmZIQo5L/9asWYOIJFw84pFHHkFV+fSET/OD535AS1sLIQnR0tZC/cp6x8/tdEEwP5S6lgsL6KakOQ0qhZrlmssKlLlcxCJep3ZmfI0VK1YgIpx11lmxtgULFqCqXH/99T1moUbLI7NZQdPqwf3HAropuHwmnjgNKoU69M8lqHXvS9+qvlm/bqrXeOqppxARvvzlL8faVqxYgaryta99LW2/nfY/qpTSWMYZq3IxBZVv9Uk2QcWtK/HElz+mqkzJFNTi+zJszjD2HtibdT+ir9G0sYmb7r4pcUYn8MorryQuoJVF/5zOUi3mlH+TPxuhm4LK97A9VfAISaggCzd1T7Fk269kch3RHn/E8Uy8YSKTPzs5IZh/6uZP0bihMWUwd9I/J/33y2Jt5hAL6Kag8j1sT5WP7tTOglxVycmCWdkGtVTBc1D1oNgFneMvF0cX9FrWi623bGXRI4siLwrcAsyET/p9knGHmC6P77T/VsHiP1aHbgrKjfrw+BRIqvVP3Ko3T7cGiyBZTXKJ9rulraXHLNH49Vai9wl1heh6qgveiHuSIcC1QHXPvmRa1+SG/76BuWvnJrxf2SwpYEqH1aH7TFDXf3bjsD2+LK5LkwcxN07UNW1sIiTJvxK1/WqzKstLttZ5dBQeHelCePGslg9b4BfQdfehYH7hhRfy8ccfU/vPtT2COYQvbJFprfWG5oaEYB593y2YB5cF9BLgp4szZ8vtw/ZC1ZtHP4Nko/9c8sbJUjfdR/63//p29j2wD34ERPdHp0HNT2t47rnn6N27d9IdYmWoko8OfJT278VKDsuTpVxKQClNWy91hZpqnuozqJAKGq5oyPq506Vu2A50X38rbnp+93RK90XH9h7YS2t7a4+njf97sSVog8VWW/QRq/d1rlALN6V6rzu1MzaqzeY1kpb8bQV+0e2OFwHn93xsvO7lmKG7kh9Yx2+DlRyWJ0u5lIBSvUZmqcp3qnmy8xXp3uuWtham/moqg+8b7PgcR0Kq5A3CVwGKD+ZnhtvkfEl4nJP0jpO/Fys5LE8W0EuAffkKKz6AD75vMNcuubZH/nnc8HFpp+sf7DpIa3ur43McdSPq+Mf2fwwH8vlxN1xIuO0feq6PLghTRk7JuINy8vdiJYflyXLoJaKULs4QJMly7slEy/miJYROpDrHcccdd3DPPfcktFVdUcWBkQdiv6e62IXT8yb291Je7BJ0xuB8bfL4k4W5PAZg6tSpPP744wn3+e1vf8v48eMTatIrpCLltUTtpKVJxurQjcH5ieWB1QNjPztdLbGmXw2qygUXXICIJATzl156CVVl/PjxQDgFEn3edBeGtvMmJh8W0E2gOQ2Qe/bvieXEu+efB1UPoqqiKuH+1RXVfHTvR4RCIVatWhVrf/PNN1FVzjnnnB6vkWlZATtvYvJlZYsmsJo2Njle5fBg10GmL5ueMi8dS5nsaIFZ0B75L+r999/nqKOOSvsa6Y4WbEq+cYMFdBNIqU6GDqoelHRSDkBre2vstu7L/I47bhyTb57c4zFtbW0cccQRjvo0sHpgxglBxuTDUi4mkFKlN/pW9Y2tcJjJvoP7+N7T30NEGDjwUI69qqqK/fv3o6qOg3nTxiY+OvBRj/bKUKWlWYxrLKCbkuPGQmXpZt86Oum5HZgJ7971bqzp1FNPpbOzk/3791NVVZXyocnUr6znQOeBHu1HHHaEpVmMayygm5Li1kJl6WZTJpt0M6h6UPgOLYQn/sSttXLJJZegqrzxxhuEQrl9ZVLtYHa270zabkwuLKCXAT8tzevWKoGpRuF7D+ylaWNTj+UDJldODgfyXx66b6+ze9G4oZFly5blsCWJ3FrewU+fpSk+C+gB57eled1aqCw6Co+NvCNa21sTtv/hhx9GRHjwlgcP3ekiqJ1dy+P/+bhr6RA3lnfw22dpis8CesC5vS52oUeIbi5UVjeijr5VfXu07zu4jxtuvQER4cYbb4y1//KXv0RV0ZWa06JfmfqS79oqtsa5/xT7iCpjQBeRU0Tk1bj/94jIDBEZKCLLRWRz5N8BBe2pyYmbS/MWY4To9kJlPbZzETAT9izfE2taunQpjRsambl7ZkG/eE5WiUwXAGyZZX/x4ogqY0BX1TdVdZSqjiK86Oc+YDFwO7BSVYcDKyO/mxLj5oi3GCPEglzhSIHHCOfINxy6bfXq1agqO4/b6doXL58RWaYAYMss+4sXR1TZplzGAG+ragvwFaAh0t4ATHCzY8YdyUa8gjBu+Lisn6tYI8R81zuP6uzspO1HbXAX8NdD7Z+6+VM0bmjk7LPPBtz74uU7IsvUD1tm2V+8OKLKNqBPAp6M/Hykqr4f+fkD4EjXemVcUzeijikjp8QuUAzha1s2NDe4WgoIpVOB0d7ejojQq1cvdn+4+9ANt4ZPdk4dM5X6lfWxfqZaWTHbL16+O4ZMAcDWOPcXL46oHC+fKyJVwHvAaaq6TUR2q2r/uNt3qWqPPLqITAOmAdTU1JzZ0uJsrWnjHreuWZruep5AQa71mY2dO3cyaNCgHu179uzh8MMPB5JvQ6q1ySukgi7tcrzeeL7X8bRrywaLm9e/LcTyuZcC61R1W+T3bSJydOTFjiY8t64HVZ2rqqNVdfSQIUOyeDnjFrdLAZONEL2swNi6dSsikhDMq6urY9Pzo8Ecko+iFU04gonq1M6sUif5jsgspRIsXhxRZTNC/y/gWVX9ZeT3+4FWVb1XRG4HBqrqd9M9h13gwhvFGPl5cZX5TZs2MWLEiIS2z3zmM2zcuDHljM5U/YTw+7G1bSshCSVdszzT++XGiMyuRGSScXWELiJ9gC8TLvqKuhf4sohsBi6O/G5KUDFGfsXMF65atQoRSQjm48ePR1V57bXX0k7PT9WfaLDuurOLLk2+A8p0ROPGiMytE8KmPDkK6Kr6saoOUtW2uLZWVR2jqsNV9WJV9e2iFKVyMq9QinHoV4ydxtNPP42IcMEFF8TabrzxRlSV3/72t671M5+dkwVk4ylVLdr/Z555phZT44ZGrZ1dqzJTtHZ2rTZuaEx6n96zeiszif3fe1bvpPc16Tl5v3Pxs5/9TAlXk8f+//GPf1ywftrfhCk1wBp1EGMDe5Fop/lMqywoXd///ve5997ETF5DQwPf+MY3Cv7alss+xN4L75X9RaKdVl0EbTp1ENJHdXV1iEhCMH/mmWdQVdeDear3y2nqJAjvdzq2IJi/BPYSdE4DdU2/mqQjdD9Op+5+VNL9MmqlTFX5+7//e1566aWE9ldeeYXRozMOTHKS7/vl5/fbqXQDo6BsY5AEdoTu9MRWkGp//bgaX0dHB0cddRShUCghmG/evBlVLVgwh/zfLz++39kK2hFs0AU2oDsN1KU2nTqfQ3g/ffn27duHiFBZWcm2bdti7R988AGqykknnVTwPuT7fjl5vN9TMrYgmL8ENuUSDchOTubUjagricPHfA/h/ZA+am1tZfDgwT3a46fnF0u+71emxwchJTNrzKykxQV+PIItB4EdoYP/aoLzPYQv5fRRS0sLIpIQzPv06cOBAwd6TM8vlnzfr0yPD0JKptSOYE16gR2h+1G+KYBsjkqKZcOGDYwcOTKhbcSIETQ3NyPSc/2UYsr3/cr0eD+lwNIplSNYk1lg69D9KEg18S+88AIXXnhhQtvll1/OkiVLPOpR8QXp8zTeKvs69GyVwsmrUk6ZOPXUU08hIgnB/Dvf+Q6qWlbBHILxeRp/sYBO6Uye8HO+8qGHHkJEuOqqq2Jt9913H6rKQw895GHPvOPnz9P4k6VcKPyhcZCnTn/ve9/jvvvuS2ibN28ekydP9qhHxgSPpVyyUMiTV4Ua/XudIpo0aRIikhDMn332WVTVgrkxHrGATmEnTxSidM2rFJGqcs455yAizJ8/P9a+Zs0aVJWxY8cW9PWNMelZQKewJ68KMfovdn1zR0cHQ4YMIRQK8fLLL8fa33rrLVSVP1X9yfMTysYYC+hAYU9eFWL0X6z65vjp+Tt27Ii1b9u2DVXlxBNPLIkTyl6nn3Lhxz6b0mcBPaL7rFLAlS9cIUb/hV5fo7W1FRGhT58+Ce0fffQRqsrQoUNjbV7PhiyFHUq2/Nhn4w8W0JNw8wtXiNF/oVJEW7Zs6TE9/4gjjohNz+/bt2+Px3g9G9LrHUou/Nhn4w9WtphEqc7wiy9/HFg9EICd7Tup6VfDuOHjWLp5aU6lkc3NzYwaNSqhbdSoUaxbty7j9Hyv36vQXSGUnn/DgtB1Z/KLPXvNj3023rKyxTx4PepMpvtRQ2t7K+0d7cy7ch6zxsyiobkh6yOK5557DhFJCOYTJkxAVVm/fr2jtVa8ng3px+Vd/dhn4w8W0JMoxS9cusP0bA/h58+fj4gwZsyYWNuMGTNQVRYvXpxVv7yeDen1DiUXfuyz8QcL6EkU+wvnpOIh3VGD0yOKOXPmICJMmjQp1vbTn/4UVWX27Nk599/LZYq93qHkwo99Nv5gOfQUijVdv/tFECC88+j+BU+XqwbS5rFvu+02/vVf/zXxdZua+PrXv+7WZhgXBXmpCJMbpzl0C+gec3pSMV3gB5Ledvr/nM7Lvzs0EQhg+fLlXHzxxS5vhXGL0x28KS9OA7pd4MJjTtMlTi7GUL+ynpbdLVQ9VsW+v+3jZQ4F83Xr1vG5z32uAFtg3JTufIgFdJOJjdA95lbZX0dHB0OHDmXXrl0J7e+88w4nnHBCvt00RWIljSYZK1v0iXxPwH788cex6fnxwXz79u2oqgVznynFCivjHxbQXZLr2hy5Vjzs2LEDEekxe3Pv3r2oKkOGDMl5W4x3stnB23owpjtLubigmCey/vKXv/DpT386oW3AgAFs27aNysrKtH20ygl/cPJZ2cnT8mJVLkVUjOnv69ev54wzzkhoO+OMM1izZk3GGZ35fPltR1CavF5ywRSX5dCLqJBLBaxcuRIRSQjmEydORFVZu3ato+n5uS4G5faqgJYicE8pLk9hvGcB3QWFOJH15JNPIiIJNeO33HILqsrChQuzeq5cv/xurgpoS8a6q1xOntogIDsW0F3g5lIBDzzwACKSMIvzgQceQFV7zPZ0Ktcvv5ujQFsy1l3lsB6MDQKyZwHdBW6szXHLLbcgItx6662xtieffBJV5eabb86rf7l++Z3sCJyOoCxF4K5yWA/GBgHZs5OiBeTkhOLEiRNZtGhRQtuKFSsSVkIsVl+SPSbdydRsTrbaSTyTLZtkdYhVuXgsXbD7+ulf54wzzuDVV19NeMz69et7XGjCa+l2BNkEaSuzM9myQcAhFtA9lvSPsRPkfkE/SXzP85me72VZYbYjKCuBNNmwQcAhri7OJSL9gf8ETgcUuBZ4E5gPDAO2AFep6q4UT+EbbgWdhNzwfuDH4R/jA+CHH36YcP3OXPoa/wcfPWkEFOUPvqZfTdIRVKrce92IurL7IprcOVmQziRyNEIXkQbg96r6nyJSBfQG/hnYqar3isjtwABV/V665yn0CD3fYOzmiGDYnGG0vNcC9/e8be/evfTp0yer50v5Gh4ektoIypjicG1ikYj0A84HHgNQ1QOquhv4CtAQuVsDMCH37ubPjRInt86qv/POO7Tc3C2Y94bqu6tp3NDoSjBv2tiUNJhD8SpHyqHSwhg/yThCF5FRwFzgdWAksBaYDryrqv0j9xFgV/T3VAo5QndjtJrvWfVk66xU1VRxYOoBavvXuna4mGxkHK8cTxoZE2Ru5tB7AWcA31HV1SLyIHB7/B1UVUUk6Z5BRKYB0wBqago3i82NOudsc8JRGzZsYOTIkQltvUf2Zu68woxWkx1JxF43YJNLjDHOOZlY9Dfgb6q6OvL7QsIBfpuIHA0Q+Xd7sger6lxVHa2qowu5pKsbU6GznYDz+9//HhFJDOZXADNh3xX7mLxoMoPvG+z6zLZ0O6kpI6dQv7Le0VRpm1ZtTLBkDOiq+gHwVxE5JdI0hnD65dfAlEjbFGBJQXrokBtToZ3mhH/zm98gIpx//vmxtqHXDYWZhJNScVrbW12frpxqJzWoehANzQ2OziPYtGpjgsdplcsowmWLVcA7wFTCO4MFQA3QQrhscWe65yn1KpdMHn/8caZOnZrQ9uKLL3LeeeelzL9HuZnXTlVdUt2rmtb2Vkev7XWFTCFZvbsJGlfr0FX1VSDZk7k7Pz1Phapzvv/++/nud7+b0LZp0yZOO+202O+p8u9R8WmSfANOqvrcaxZdk/G107Wla/cLr2vzjfGSLc6Vgqpy2223ISKxYN6nTx+2bNmCqiYEc0ie8okXTZO4leqoG1HHlhlb6Lqziy0ztlA3oi6r8whBXX7VFnQy5cwCejcdHR184xvfIBQKxZarPeGEE9i+fTt79+6ltrY2djJR7hJ63d0LuUuoX1nPlJFTGFQ9qMdzxufyCxlwsjmPENTlV4N65GGMExbQIz755BMuueQSKisrmTdvHgDnnHMOe/bs4Z133olddDl+hA3QqZ1A+NC+obmBBy99kMYrG1OeWC1kwMlmok9QJwUF9cjDGCfKfnGutrY2vvSlLyWsfHj55ZezYMG98BpcAAAKuklEQVQCDjvssB73T3UyMSrTScUgn4wsBbYcgQmiwFxTtFC10h988AHHHHMM/fv3jwXz6667jo6ODpYsWZI0mEPmkXSm21Pl2vce2Ov7ksFSqGsP6pGHMU44qnLxSiEqFt5++21OOeUUOjs7Y2319fXcc889ji64nKmaJdOhfbTf05dNTygxjNarx9/HqVIo0yul6hJb1dGUq5Ieobt5ArG5uRkR4aSTTooF8zlz5qCq/PCHP3QUzCF9NYvTk4p1I+roW9W3R3su21YqE4SsusQY75V0QHfjBOKqVasQkYQrATU2NqKqTJ8+Pes+xR/SA1RIBUDWh/bZbFu6VEapBFKrLjHGeyWdcsl1sSyAJUuWMGFC4oq+S5cu5dJLL827X24c0jvdtkypDK8DaTTdk2qWrFWXGFM8JT1Cz6VW+he/+AUikhDM//jHP6KqrgRztzjdtkwjcC/L9LqXcHYXhLp2Y/ykpAN6NhULP/nJTxARvvnNb8baXnvtNVSVc889t5jddsTptmUagXs5QSjdMr5WXWJM8fm6Dl1VufXWW5k9e3as7fDDD2fTpk0FXXu9mNLVvdf2q02YgVrsKpd8LwhijHEmMHXoyXR0dDB58mRCoVAsmJ944ol8+OGH7NmzJzDBHNJX1cTn07uv61IMNivTmNLiq4De3t7O2LFjqayspKkpXOlx3nnn8dFHH/HWW28xePBgj3vovu5VNd1F8+leTOoJ6nowxviVL1IuHR0djB49mubm5ljbhAkTmD9/PlVVVW52saSlW3O9d2VvT6a7l8KkJmOCzmnKxRcBfdGiRUycOBGAadOm8cgjjxAK+ergwhWp8ukVUhFbJCyerQ9jTDAEKoc+fvx4Xn75Zbq6unj00UfLMphD6hRHsmAONqnHmHLji8h42GGHcdZZZzmenh9UqUodU+XX7eSkMeWlpGeKmp5SzVJNtmSsnZw0prz4YoRu0rMlY40x4JOTokFnlSLGmHScnhS1lIvHSmkdcWOMv1nKpQj8sPytMcb/bIReYKW+/K0xJjhshF5guS5/O7B6oOfX5zTG+IsF9ALLZfnbqooq9uzf4/ll5Ywx/lKWAb2YC1llWpEwWcnh4VWHc7DrYML9La9ujMmk7AJ6sS+q7GRFwroRdcwaM4uafjVsbdtKa3tr0ueyvLoxJp2yC+jFripxMumn+04mFZvKb4xJp+yqXLyoKsl0Uel0l3KLsqn8xphMym6EXopX2cm0M7Gp/MYYJ8ouoJfiVXbS7UwEKepl5Ywx/lV2Ab0UF7KaNWYWQvKlgS1vboxxquxy6JA5p11sdSPq+MPWP/DzNT9POCnq9ZGDMcZfym6EHs+LCyun8vD4h5l35bySOnIwxvhL2S6f232NFSjehZWNMSYbgbqmaCHYKofGmKAp24BuqxwaY4LGUUAXkS0islFEXhWRNZG2gSKyXEQ2R/4dUNiuuqsU69GNMSYf2YzQL1TVUXF5nNuBlao6HFgZ+d03SrEe3Rhj8pFPyuUrQEPk5wZgQv7dKZ5SrEc3xph8OKpyEZG/ALsABR5V1bkisltV+0duF2BX9PdUSqnKxRhj/MLti0R/QVXfFZGhwHIR+VP8jaqqIpJ0zyAi04BpADU1lp82xphCcZRyUdV3I/9uBxYDZwPbRORogMi/21M8dq6qjlbV0UOGDHGn18YYY3rIGNBFpI+IHB79GRgLbAJ+DUyJ3G0KsKRQnTTGGJOZk5TLkcDicJqcXsATqvqMiLwCLBCRbwItwFWF66YxxphMMgZ0VX0HGJmkvRUYU4hOGWOMyV7ZzhQ1xpigsYBujDEBYQHdGGMCwgK6McYEhAV0Y4wJCAvoJaSUrqBkjPGfsrymaCnqfgWllrYWpv1mGoAtGGaMccRG6CXCrqBkjMmXBfQSYVdQMsbkywJ6ibArKBlj8mUBvUTYFZSMMfmygF4i7ApKxph8ObpikVvsikXGGJM9p1csshG6McYEhAV0Y4wJCAvoBrBZqsYEgc0UNTZL1ZiAsBG6sVmqxgSEBXSPlUKqw2apGhMMFtA9FE11tLS1oGgs1VHsoG6zVI0JBgvoHiqVVIfNUjUmGCyge6hUUh02S9WYYLAqFw/V9Kuhpa0laXux1Y2oswBujM/ZCN1DluowxrjJArqHLNVhjHGTLc5ljDElzhbnMsaYMmMB3RhjAsICujHGBIQFdGOMCQgL6MYYExBFrXIRkQ+BnjNpcjMY2OHSc5WaoG5bULcLgrttQd0u8Ne21arqkEx3KmpAd5OIrHFSxuNHQd22oG4XBHfbgrpdEMxts5SLMcYEhAV0Y4wJCD8H9Lled6CAgrptQd0uCO62BXW7IIDb5tscujHGmER+HqEbY4yJU/IBXUSOF5HnReR1EXlNRKZH2geKyHIR2Rz5d4DXfc1Wmm27X0T+JCIbRGSxiPT3uq/ZSrVtcbffKiIqIoO96mMu0m2XiHwn8rm9JiL3ednPXKT5exwlIi+JyKsiskZEzva6r9kQkU+JyMsi0hzZrrsi7SeIyGoReUtE5otIldd9zZuqlvT/wNHAGZGfDwf+DHwGuA+4PdJ+O/ATr/vq4raNBXpF2n8SpG2L/H488CzhOQmDve6rS5/ZhcAK4LDIbUO97quL2/Y74NJI+zjgBa/7muV2CdA38nMlsBo4F1gATIq0/xz4ttd9zff/kh+hq+r7qrou8vNHwBvAscBXgIbI3RqACd70MHeptk1Vf6eqHZG7vQQc51Ufc5XmcwOYDXwX8N0JnDTb9W3gXlXdH7ltu3e9zE2abVPgiMjd+gHvedPD3GjY3sivlZH/FbgIWBhp92UM6a7kA3o8ERkGfI7wHvZIVX0/ctMHwJEedcsV3bYt3rXAsmL3x03x2yYiXwHeVdVmTzvlgm6f2cnAFyOH8P8jImd52bd8ddu2GcD9IvJX4KfA973rWW5EpEJEXgW2A8uBt4HdcQOnv3FowOFbvgnoItIXeBqYoap74m/T8DGT70Z7Uam2TUTqgQ6gyau+5St+2whvyz8Dd3jaKRck+cx6AQMJH8r/P2CBiIiHXcxZkm37NnCzqh4P3Aw85mX/cqGqnao6ivDR7tnAqR53qSB8EdBFpJLwH1iTqi6KNG8TkaMjtx9NeM/rOym2DRH5P8BlQF1kh+U7SbbtROAEoFlEthD+cq0TkaO862X2UnxmfwMWRQ7vXwa6CK8V4isptm0KEP35KcIB0ZdUdTfwPHAe0F9EekVuOg5417OOuaTkA3pklPMY8IaqPhB3068J/6ER+XdJsfuWr1TbJiKXEM4xX66q+7zqXz6SbZuqblTVoao6TFWHEQ6CZ6jqBx52NStp/h5/RfjEKCJyMlCFfxZ+AtJu23vABZGfLwI2F7tv+RCRIdFKMRGpBr5M+PzA88BXI3fzZQzpruQnFonIF4DfAxsJj3ogfNi+mvBZ6hrC1RJXqepOTzqZozTb9hBwGNAaaXtJVa8vfg9zl2rbVHVp3H22AKNV1TeBL81ntgL4BTAKOADcpqrPedLJHKXZtj3Ag4TTSp8AN6jqWk86mQMR+Szhk54VhAexC1T1bhH5NPBfhFNl64HJ0ZPaflXyAd0YY4wzJZ9yMcYY44wFdGOMCQgL6MYYExAW0I0xJiAsoBtjTEBYQDfGmICwgG6MMQFhAd0YYwLi/wM6kA1aREXRBwAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from matplotlib import pyplot as plt\n", "%matplotlib inline\n", "\n", "plt.scatter(train_X['male_BMI'], train_y,color='g')\n", "plt.plot(train_X['male_BMI'], model2.predict(train_X['male_BMI'].reshape(-1, 1)),color='k')\n", "\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "celltoolbar": "Slideshow", "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }