1922 lines
422 KiB
Plaintext
1922 lines
422 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## Uczenie maszynowe UMZ 2017/2018\n",
|
|||
|
"# 2. Regresja logistyczna\n",
|
|||
|
"### Część 2"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## 2.5. Regresja wielomianowa"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Wybór cech"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Zadanie: przewidzieć cenę działki o kształcie prostokąta.\n",
|
|||
|
"\n",
|
|||
|
"Jakie cechy wybrać?"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
" * $x_1$ – szerokość działki, $x_2$ – długość działki:\n",
|
|||
|
"$$ h_{\\theta}(\\vec{x}) = \\theta_0 + \\theta_1 x_1 + \\theta_2 x_2 $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
" * $x_1$ – powierzchnia działki:\n",
|
|||
|
"$$ h_{\\theta}(\\vec{x}) = \\theta_0 + \\theta_1 x_1 $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Regresja wielomianowa"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 50,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Przydtne importy\n",
|
|||
|
"\n",
|
|||
|
"import ipywidgets as widgets\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import pandas\n",
|
|||
|
"\n",
|
|||
|
"%matplotlib inline"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 51,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Przydatne funkcje\n",
|
|||
|
"\n",
|
|||
|
"# Wersja macierzowa funkcji kosztu\n",
|
|||
|
"def cost(theta, X, y):\n",
|
|||
|
" m = len(y)\n",
|
|||
|
" J = 1.0 / (2.0 * m) * ((X * theta - y).T * (X * theta - y))\n",
|
|||
|
" return J.item()\n",
|
|||
|
"\n",
|
|||
|
"# Wersja macierzowa gradientu funkcji kosztu\n",
|
|||
|
"def gradient(theta, X, y):\n",
|
|||
|
" return 1.0 / len(y) * (X.T * (X * theta - y)) \n",
|
|||
|
"\n",
|
|||
|
"# Algorytm gradientu prostego (wersja macierzowa)\n",
|
|||
|
"def gradient_descent(fJ, fdJ, theta, X, y, alpha=0.1, eps=10**-5):\n",
|
|||
|
" current_cost = fJ(theta, X, y)\n",
|
|||
|
" logs = [[current_cost, theta]]\n",
|
|||
|
" while True:\n",
|
|||
|
" theta = theta - alpha * fdJ(theta, X, y)\n",
|
|||
|
" current_cost, prev_cost = fJ(theta, X, y), current_cost\n",
|
|||
|
" if abs(prev_cost - current_cost) > 10**15:\n",
|
|||
|
" print('Algorithm does not converge!')\n",
|
|||
|
" break\n",
|
|||
|
" if abs(prev_cost - current_cost) <= eps:\n",
|
|||
|
" break\n",
|
|||
|
" logs.append([current_cost, theta]) \n",
|
|||
|
" return theta, logs\n",
|
|||
|
"\n",
|
|||
|
"# Wykres danych (wersja macierzowa)\n",
|
|||
|
"def plot_data(X, y, xlabel, ylabel): \n",
|
|||
|
" fig = plt.figure(figsize=(16*.6, 9*.6))\n",
|
|||
|
" ax = fig.add_subplot(111)\n",
|
|||
|
" fig.subplots_adjust(left=0.1, right=0.9, bottom=0.1, top=0.9)\n",
|
|||
|
" ax.scatter([X[:, 1]], [y], c='r', s=50, label='Dane')\n",
|
|||
|
" \n",
|
|||
|
" ax.set_xlabel(xlabel)\n",
|
|||
|
" ax.set_ylabel(ylabel)\n",
|
|||
|
" ax.margins(.05, .05)\n",
|
|||
|
" plt.ylim(y.min() - 1, y.max() + 1)\n",
|
|||
|
" plt.xlim(np.min(X[:, 1]) - 1, np.max(X[:, 1]) + 1)\n",
|
|||
|
" return fig\n",
|
|||
|
"\n",
|
|||
|
"# Wykres funkcji fun\n",
|
|||
|
"def plot_fun(fig, fun, X):\n",
|
|||
|
" ax = fig.axes[0]\n",
|
|||
|
" x0 = np.min(X[:, 1]) - 1.0\n",
|
|||
|
" x1 = np.max(X[:, 1]) + 1.0\n",
|
|||
|
" Arg = np.arange(x0, x1, 0.1)\n",
|
|||
|
" Val = fun(Arg)\n",
|
|||
|
" return ax.plot(Arg, Val, linewidth='2')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 52,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Wczytanie danych (mieszkania) przy pomocy biblioteki pandas\n",
|
|||
|
"\n",
|
|||
|
"alldata = pandas.read_csv('data_flats.tsv', header=0, sep='\\t',\n",
|
|||
|
" usecols=['price', 'rooms', 'sqrMetres'])\n",
|
|||
|
"data = np.matrix(alldata[['sqrMetres', 'price']])\n",
|
|||
|
"\n",
|
|||
|
"m, n_plus_1 = data.shape\n",
|
|||
|
"n = n_plus_1 - 1\n",
|
|||
|
"Xn = data[:, 0:n]\n",
|
|||
|
"Xn /= np.amax(Xn, axis=0)\n",
|
|||
|
"Xn2 = np.power(Xn, 2) \n",
|
|||
|
"Xn2 /= np.amax(Xn2, axis=0)\n",
|
|||
|
"Xn3 = np.power(Xn, 3) \n",
|
|||
|
"Xn3 /= np.amax(Xn3, axis=0)\n",
|
|||
|
"\n",
|
|||
|
"X = np.matrix(np.concatenate((np.ones((m, 1)), Xn), axis=1)).reshape(m, n + 1)\n",
|
|||
|
"X2 = np.matrix(np.concatenate((np.ones((m, 1)), Xn, Xn2), axis=1)).reshape(m, 2 * n + 1)\n",
|
|||
|
"X3 = np.matrix(np.concatenate((np.ones((m, 1)), Xn, Xn2, Xn3), axis=1)).reshape(m, 3 * n + 1)\n",
|
|||
|
"y = np.matrix(data[:, -1]).reshape(m, 1)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Postać ogólna regresji wielomianowej:\n",
|
|||
|
"\n",
|
|||
|
"$$ h_{\\theta}(x) = \\sum_{i=1}^{n} \\theta_i x^i $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 53,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Funkcja regresji wielomianowej\n",
|
|||
|
"\n",
|
|||
|
"def h_poly(Theta, x):\n",
|
|||
|
" return sum(theta * np.power(x, i) for i, theta in enumerate(Theta.tolist()))\n",
|
|||
|
"\n",
|
|||
|
"def polynomial_regression(theta):\n",
|
|||
|
" return lambda x: h_poly(theta, x)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Funkcja kwadratowa:\n",
|
|||
|
"\n",
|
|||
|
"$$ h_{\\theta}(x) = \\theta_0 + \\theta_1 x + \\theta_2 x^2 $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 54,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"[<matplotlib.lines.Line2D at 0x7f39d752eb10>]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 54,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoAAAAFnCAYAAAA2ZPiEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzs3Xl8VNX9//HXmaxkYQ9b2CGCoAKy\niaJFraioBZcKapUqrRvWUmt/aGt3rdZvW6utO1rBFfcVte6IZQuIyGoCsoZNgjAJZJ3z++POmBAy\nySSZyZ3MvJ+PRx4Tzr1z74dEkw/n3M/nGGstIiIiIhI/PG4HICIiIiLNSwmgiIiISJxRAigiIiIS\nZ5QAioiIiMQZJYAiIiIicUYJoIiIiEicUQIoIiIiEmeUAIqIiIjEGSWAIiIiInEm0e0Aol3Hjh1t\n7969ASj49hB7i8vokJFMtzat3A1MRJrXtm2wa1fw4126QHZ288UjcaO80rJu5wEMcHTX1iR4jNsh\nSZRYtmzZN9barMa8VwlgPXr37k1ubi4AX27bz3n/XkD79GT+d+vpJCdqAlUkbsyaBTNmQHHxkcfS\n0+H222HatOaPS2Leo/M3cse8tZw1uAsPXT7c7XAkihhjNjf2vcpgGuCY7NYM6JxJYXEZH63f7XY4\nItKcJk8GT5AfmR6Pc1wkAl75fDsAk4ZphlnCRwlgAxhjuGh4dwBeXLbN5WhEpFllZsK8ec5reroz\nlp5eNZ6R4W58EpPW7/SyZscBWqcmcurARq30idRKS8ANNHFYN+56Zx0frdvNN0WldMxIcTskEWku\nY8dCQQHMnQv5+dC/vzPzp+RPIuTVFc7s3znHdSMlMcHlaCSWKAFsoE6ZqYw7KosP1u3mtRUFTBvb\nx+2QRKQ5ZWToWT9pFj6f5TX/8u/5Wv6VMNMScCNoGVhERCJt8deFFOwvIbttK0b0aud2OBJjlAA2\nwmlHd6JtWhJrdxxgdcF+t8MREZEY9Gq12T+PWr9ImCkBbISUxAQmDukGaBZQRETCr6S8knlf7gBg\n0rBuLkcjsUgJYCNdNLwHAK+tKKCswudyNCIiEks+XLcbb2kFx2a3oX+nTLfDkRikBLCR1BNQREQi\nRb3/JNKUADaSegKKiEgk7Csu4+P1u/EYOG9IV7fDkRilBLAJJg7rRoLHfNcTUEREpKne/HIH5ZWW\nsTlZdMpMdTsciVFKAJsg0BOwwmd5bUWB2+GIiEgMCFT/XqDlX4kgJYBNpGVgEREJly17D7Js8z7S\nkhMYP7iz2+FIDFMC2ETqCSgiIuES2PrtzMFdSEvWZl0SOUoAmyglMYFJQ51pes0CiohIY1lreXm5\n83tE1b8SaUoAwyCwDKyegCIi0liLNhayae9BurRO5aR+HdwOR2KcEsAwGNytNQO7OD0BP1ynnoAi\nItJwzy7ZAsDFI7qTmKBfzxJZ+i8sDNQTUEREmmJfcRnvrNqJMXDxyB5uhyNxQAlgmEwcmu30BFy/\nmz1e9QQUEZHQvfz5dsoqfZyck0X3dmluhyNxIGIJoDGmhzHmI2PMGmPMamPMz/3jfzDGbDfGrPB/\nTKj2nluNMfnGmPXGmDOrjZ/lH8s3xtxSbbyPMWaxf3yuMSbZP57i/3O+/3jv+u7RVFmZKZw6IItK\nn+U1fxWXiIhIfay1POdf/r10lGb/pHlEcgawAviltXYQcAIw3RgzyH/sHmvtUP/HPAD/sSnAYOAs\n4AFjTIIxJgG4HzgbGARcUu06f/Vfqz+wD5jmH58G7POP3+M/L+g9wvUXrr4MbK0N12VFRCSGLd+y\nj7zdRXTMSOH0o9X7T5pHxBJAa+0Oa+1y/+deYC1QV137ROA5a22ptfZrIB8Y5f/It9ZutNaWAc8B\nE40xBjgNeNH//tnApGrXmu3//EXgdP/5we4RFqcN7Ey7tCTW7fSyuuBAuC4rIiIx7NklWwFnEiFJ\nxR/STJrlvzT/EuwwYLF/6AZjzEpjzOPGmHb+sWxga7W3bfOPBRvvAHxrra2oMX7YtfzH9/vPD3at\nmvFebYzJNcbk7tmzJ+S/Z3Kih4nqCSgiIiE6UFLOmyudrUSnqPhDmlHEE0BjTAbwEjDDWnsAeBDo\nBwwFdgB/j3QMDWWtfcRaO8JaOyIrK6tB763qCbhdPQFFRKROr60ooKTcx5i+HejdMd3tcCSORDQB\nNMYk4SR/T1trXwaw1u6y1lZaa33Ao1QtwW4Hqv/zp7t/LNj4XqCtMSaxxvhh1/Ifb+M/P9i1wibQ\nE3DfwXL1BBQRkaCstTy72Cn+mKLiD2lmkawCNsBjwFpr7T+qjXetdtr5wCr/568DU/wVvH2AHGAJ\nsBTI8Vf8JuMUcbxunSqLj4CL/O+fCrxW7VpT/Z9fBHzoPz/YPcL591ZPQBERqdeX2/ezZscB2qYl\ncebgLm6HI3EmkjOAJwGXA6fVaPlytzHmS2PMSuBU4BcA1trVwPPAGuAdYLp/prACuAF4F6eQ5Hn/\nuQAzgZuMMfk4z/g95h9/DOjgH78JuKWue4T7L66egCIiUp9A8ccFw7qTmhS2hhQiITFqV1K3ESNG\n2Nzc3Aa/7yezl/L+2t3cds7R/OTkvhGITEREWqri0gpG3fE+xWWVvPeLU8jpnOl2SNICGWOWWWtH\nNOa9qjePEPUEFBGRYN5cWUBxWSUjerVT8ieuUAIYIeoJKCIiwQSWf6eM6ulyJBKvlABGiHoCiohI\nbdbtPMCKrd+SmZrIOcd2rf8NIhGgBDCC1BNQRERqes4/+zdpaDatklX8Ie5QAhhB6gkoIiLVlZRX\n8vJyZ1VIvf/ETUoAI0g9AUVEpLq3V+3gQEkFx3Vvw+BubdwOR+KYEsAImzQsm0T1BBQREeDZxf7i\nj5Eq/hB3KQGMsI4ZKYwb0IlKn+W1FWHddU5ERFqQ/N1FLNlUSFpyAj8Y2s3tcCTOKQFsBoFl4Bdy\n1RNQRCRezV3q7Pv7gyHdyEhJrOdskchSAtgMThvYifbpyazf5eXzrd+6HY6IiDSz0opKXlrurAKp\n959EAyWAzSA50cPFI5xqr6cWbnY5GhERaW7vrdlFYXEZA7tkMqS7ij/EfUoAm8llo3tiDLy5cgeF\nxWVuhyMiIs0o0PvvklE9Mca4HI2IEsBm06N9GqcO6ERZpY/nc7e6HY6IiDSTLXsPsiD/G1ISPUzy\n7xAl4jYlgM3o8hN6AfD04s1U+lQMIiISD+bmOsUf5xzblTZpSS5HI+JQAtiMTjkqix7tW7G18BDz\nv9rjdjgiIhJh5ZU+XsgN7Pyh4g+JHkoAm1GCx3DZaGcW8MlFKgYREYl1H67bzW5vKf2y0hnZu53b\n4Yh8RwlgM7t4RA+SEz18tH43WwsPuh2OiIhE0HNLnOVfFX9ItFEC2Mzapydz7rFdsRaeXrzF7XBE\nRCRCCr49xCdf7SE5wcMFx3d3OxyRwygBdMFl/mKQ53O3UlpR6XI0IiISCc/nbsVnYfzgzrRPT3Y7\nHJHDKAF0wfE92zKoa2sKi8t4+8udbocjIiJhVumzPL+0qvefSLRRAugCYwyXj1ExiIhIrJqft4eC\n/SX0bJ/GmL4d3A5H5AhKAF0ycWg3MlMSWbZ5H6sL9rsdjoiIhFGg+GPyyB54PCr+kOijBNAlacmJ\nXDjceSj4qUUqBhERiRW7vSV8sHY3CR7DD4er+EOikxJAF/3IXwzy6ufbOVBS7nI0IiISDi8u20aF\nz/L9ozvRqXWq2+GI1EoJoIv6d8rgxH4dOFReycvLtrkdjoiINJHPZ3luiVP8oZ0/JJopAXRZYH/g\nJxdtxlrtDywi0pIt3LiXLYUHyW7bilNystwORyQoJYAu+/6gznRuncKGPcUs3LjX7XBERKQJnvUX\nf/xwRHcSVPwhUUwJoMuSEjxMGeksEzylljAiIi1WYXEZ/129C49xtv0UiWZKAKPAJaN6kuAxvLt6\nF7sOlLgdjoiINMLLy7d
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f3a5ac25410>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"fig = plot_data(X2, y, xlabel='x', ylabel='y')\n",
|
|||
|
"theta_start = np.matrix([0, 0, 0]).reshape(3, 1)\n",
|
|||
|
"theta, logs = gradient_descent(cost, gradient, theta_start, X2, y)\n",
|
|||
|
"plot_fun(fig, polynomial_regression(theta), X)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Funkcja sześcienna:\n",
|
|||
|
"\n",
|
|||
|
"$$ h_{\\theta}(x) = \\theta_0 + \\theta_1 x + \\theta_2 x^2 + \\theta_3 x^3 $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 55,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[[ 397519.38046962]\n",
|
|||
|
" [ -841341.14146733]\n",
|
|||
|
" [ 2253713.97125102]\n",
|
|||
|
" [ -244009.07081946]]\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoAAAAFnCAYAAAA2ZPiEAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzs3Xd8VFX+//HXmVRS6D30LoiAIqBi\nwYqo2BesuOK67qpf0S3q7v5Wt+q6u5a1sAULdlx7QVm7otKLdBM6hNBLepvz++POmBAyySSZyZ3y\nfj4eeUw49869HyYKH865n88x1lpEREREJH543A5ARERERJqXEkARERGROKMEUERERCTOKAEUERER\niTNKAEVERETijBJAERERkTijBFBEREQkzigBFBEREYkzSgBFRERE4kyi2wFEuvbt29tevXqF5dpb\n9xVxoLiczq1S6ZCREpZ7iEiIbNsGO3cGPt65M2RlNV88IoDXWtbsyMdrLf07ZpCalOB2SNKMFi9e\nvMda26Ex71UCWI9evXqxaNGisFx79ood/PSFJYzo0Zo3fnpSWO4hIiEyYwZMmwaFhUceS0+HP/4R\npk5t/rgkrj0/bzO/eXMlo3q15ZWbTnA7HGlmxpjNjX2vloBddNrADqQmeVi65QA7Dha7HY6I1GXS\nJPAE+CPT43GOizQjay3Pz3P+/r/6hJ4uRyPRRgmgi9KSEzl1gDNzO2dlnsvRiEidMjNh9mznNT3d\nGUtPrxrPyHA3Pok7izfvZ21ePu0zkhk/pLPb4UiU0RKwy849ugtzVu3k/ZV5XHdSb7fDEZG6jB0L\nubkwaxbk5EC/fs7Mn5I/ccFzvtm/Scd3JzlR8znSMEoAXXb6UR1JSjAs3LSPPQWltFcxiEhky8jQ\ns37iuj0FpcxesQOPgStG9XA7HIlC+ieDy1qmJjG2X3u8Fv63qo4KQxEREZ9XFm2lvNJy+qCOdGuT\n5nY4EoWUAEaAc4/uAsD7K3e4HImIiES6Sq/lhXlbALh6jIo/pHGUAEaAswZ3IsFj+Gb9Xg4Wlbsd\njoiIRLDP1u1i+4FierRN45T+jWoBJ6IEMBK0SU9mTJ+2VHgtH67RMrCIiATmb/1y1egeeDzG5Wgk\nWikBjBDjfcvAH2gZWEREAtiyt4jPvttNcqKHy0d2dzsciWJKACPEOUM6YQx8kb2HgtIKt8MREZEI\n9MKCzVgL5x/ThbbpyW6HI1FMCWCE6JiZysiebSir8PLJ2l1uhyMiIhGmpLyS/y7aBqj4Q5pOCWAE\n0TKwiIgE8v7KHewrLGNI15aM6N7a7XAkyikBjCDjj3a28vl07W6KyypdjkZERCLJc984xR/XjOmJ\nMSr+kKZRAhhBslq3YFi3VhSXV/L5d7vdDkdERCLEqtyDLNlygMzURCYO7+p2OBIDlABGGC0Di4hI\nTc/7Gj9femw30pK1i6s0nRLACHOubxn44zW7KK3QMrCISLw7VFLOm0u3Ayr+kNBRAhhherVPZ1Dn\nTPJLK/g6Z6/b4YiIiMveWLKd4vJKTujTjn4dM9wOR2KEEsAIpL2BRUQEwFrLc76dP645QbN/EjpK\nACPQuUOdZeAPV++kotLrcjQiIuKWeRv2kbOrgI6ZKZw1uJPb4UgMCVsCaIzpboz51Biz2hizyhhz\nm2/8XmPMdmPMMt/XhGrvudsYk2OMWWeMOafa+HjfWI4x5q5q472NMfN947OMMcm+8RTfr3N8x3vV\nd49I0r9jBn06pLO/qJz5G/e5HY6IiLjk+fnO7N/kUT1IStCcjYROOP9rqgB+Zq0dDIwBbjbGDPYd\ne8haO9z3NRvAd2wyMAQYDzxhjEkwxiQAjwPnAoOBK6pd5y++a/UD9gNTfeNTgf2+8Yd85wW8R/g+\ngsYxxnxfDKJlYBGR+LTrUAlzVuaR4DFcMUr7/kpohS0BtNbusNYu8X2fD6wBsup4y4XAy9baUmvt\nRiAHGOX7yrHWbrDWlgEvAxcapwvm6cCrvvfPBC6qdq2Zvu9fBc7wnR/oHhHH/xzgnFU78Xqty9GI\niEhze3nhViq8lrOO6kSXVi3cDkdiTLPMJ/uWYEcA831DtxhjvjXGPGWMaeMbywK2VnvbNt9YoPF2\nwAFrbUWN8cOu5Tt+0Hd+oGvVjPdGY8wiY8yi3bvdacg8pGtLurVpwe78UhZv2e9KDCIi4o6KSi8v\nLXB6/6n4Q8Ih7AmgMSYDeA2YZq09BEwH+gLDgR3A38MdQ0NZa/9trR1prR3ZoUMHV2I4bBl4RZ4r\nMYiIiDs+XruLHQdL6NM+nRP7tnM7HIlBYU0AjTFJOMnfC9ba1wGstTuttZXWWi/wH6qWYLcD1R9y\n6OYbCzS+F2htjEmsMX7YtXzHW/nOD3StiDT++2XgPKzVMrCISLx43tf65Srt+ythEs4qYAM8Cayx\n1j5YbbxLtdMuBlb6vn8bmOyr4O0N9AcWAAuB/r6K32ScIo63rZMRfQpc5nv/FOCtatea4vv+MuAT\n3/mB7hGRRnRvTaeWKWw/UMy32w66HY6IiDSDjXsK+TJ7D6lJHi47tpvb4UiMCucM4EnANcDpNVq+\nPGCMWWGM+RYYB9wOYK1dBbwCrAY+AG72zRRWALcAc3AKSV7xnQtwJ3CHMSYH5xm/J33jTwLtfON3\nAHfVdY8wfgZN4vEYxg/xVwNrGVhEJB684Jv9mzisK63SklyORmKV0dJi3UaOHGkXLVrk2v2/Wb+X\nK/4zj17t0vj056dpKUBEJIaVlFcy+s8fc7C4nHduGcvQbq3cDkkimDFmsbV2ZGPeq66SEW5U77a0\nS09m094i1ublux2OiIiE0TvLczlYXM6w7q2V/ElYKQGMcAkew9lDnO1/tAwsIhK7qu/7e/XoHi5H\nI7FOCWAU8FcDf6BdQUREYtaizfv5dttB2qQlccGwrm6HIzFOCWAUOKFPO1qmJvLdzgLW7y5wOxwR\nEQmDGV9uAOCaMT1JTYq4XUolxigBjALJiR7OHOwsA3+gZWARkZizeW8h/1u9k+QED1dr5w9pBkoA\no4R/b+D3tQwsIhJznv5qE9bCxOFd6ZiZ6nY4EgeUAEaJk/u3Jz05gZXbD7F1X5Hb4YiISIgcLC7n\nlUXONvVTx/Z2ORqJF0oAo0RqUgLjBnUEtAwsIhJLZi3cQlFZJSf1a8dRXVq6HY7ECSWAUUTLwCIi\nsaWi0sszX20C4IaxfdwNRuKKEsAoctrADqQkeliy5QB5B0vcDkdERJro/ZV55B4soU+HdE4d0MHt\ncCSOKAGMIukpiZzi+wNiziotA4uIRDNr7fetX6aO7Y3Ho60+pfkoAYwy5x7dGYDZK7QMLCISzRZv\n3s9yX+PnS0Z0czsciTNKAKPMmYM7kZzoYcGmfWw/UOx2OCIi0khPzt0IwFWje9IiWY2fpXkpAYwy\nLVOTOGtwJ6yFN5dudzscERFphC17i5izKo+kBMO1avwsLlACGIUuPTYLgDeWbsda63I0IiLSUE9/\nvRGvhYnDsujYUo2fpfkpAYxCJ/fvQPuMZHJ2FbBi+0G3wxERkQY4VFLOKwvV+FncpQQwCiUleJg4\nzJkFfH2JloFFRKLJrAVbKSyr5MS+7RjcVY2fxR1KAKPUJb5l4LeX51Je6XU5GhERCUZFpZdnvt4E\nwA0na/ZP3KMEMEoN6dqSAZ0y2FdYxufrdrsdjoiIBOGDVXlsP1BMnw7pnDago9vhSBxTAhiljDFc\ncqzTN+r1pdtcjkZERIIx40un9cv1J6nxs7hLCWAUu2h4FsbAR6t3cbCo3O1wRESkDos372fZ1gO0\nTkvi0mPV+FncpQQwinVulcrYfu0pq/Ty7opct8MREZE6PDnX2fbtqtE91PhZXKcEMMpdPELVwCIi\nkW7rviI+WOlv/NzL7XBElABGu3OGdCYtOYHFm/ezeW+h2+GIiEgtnvl6E14LFxzTlU5q/CwRQAlg\nlEtPSWT80Z0BzQKKiES
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f3a5ef4add0>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"fig = plot_data(X3, y, xlabel='x', ylabel='y')\n",
|
|||
|
"theta_start = np.matrix([0, 0, 0, 0]).reshape(4, 1)\n",
|
|||
|
"theta, _ = gradient_descent(cost, gradient, theta_start, X3, y)\n",
|
|||
|
"plot_fun(fig, polynomial_regression(theta), X)\n",
|
|||
|
"\n",
|
|||
|
"print(theta)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Regresję wielomianową można potraktować jako szczególny przypadek regresji liniowej wielu zmiennych:\n",
|
|||
|
"\n",
|
|||
|
"$$ h_{\\theta}(x) = \\theta_0 + \\theta_1 x + \\theta_2 x^2 + \\theta_3 x^3 $$\n",
|
|||
|
"$$ x_1 = x, \\quad x_2 = x^2, \\quad x_3 = x^3, \\quad \\vec{x} = \\left[ \\begin{array}{ccc} x_0 \\\\ x_1 \\\\ x_2 \\end{array} \\right] $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Uwaga praktyczna: przyda się normalizacja cech, szczególnie skalowanie!"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"W ten sposób możemy stosować również inne „pochodne” cechy, np.:\n",
|
|||
|
"\n",
|
|||
|
"$$ h_{\\theta}(x) = \\theta_0 + \\theta_1 x + \\theta_2 \\sqrt{x} $$\n",
|
|||
|
"$$ x_1 = x, \\quad x_2 = \\sqrt{x}, \\quad \\vec{x} = \\left[ \\begin{array}{ccc} x_0 \\\\ x_1 \\end{array} \\right] $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Wielomianowa regresja logistyczna\n",
|
|||
|
"\n",
|
|||
|
"Podobne modyfikacje cech możemy również stosować dla regresji logistycznej."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 13,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def powerme(x1,x2,n):\n",
|
|||
|
" X = []\n",
|
|||
|
" for m in range(n+1):\n",
|
|||
|
" for i in range(m+1):\n",
|
|||
|
" X.append(np.multiply(np.power(x1,i),np.power(x2,(m-i))))\n",
|
|||
|
" return np.hstack(X)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 14,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"matrix([[ 1. , 0.36596696, -0.11214686],\n",
|
|||
|
" [ 0. , 0.4945305 , 0.47110656],\n",
|
|||
|
" [ 0. , 0.70290604, -0.92257983],\n",
|
|||
|
" [ 0. , 0.46658862, -0.62269739],\n",
|
|||
|
" [ 0. , 0.87939462, -0.11408015],\n",
|
|||
|
" [ 0. , -0.331185 , 0.84447667],\n",
|
|||
|
" [ 0. , -0.54351701, 0.8851383 ],\n",
|
|||
|
" [ 0. , 0.91979241, 0.41607012],\n",
|
|||
|
" [ 0. , 0.28011742, 0.61431157],\n",
|
|||
|
" [ 0. , 0.94754363, -0.78307311]])"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 14,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Wczytanie danych\n",
|
|||
|
"import pandas\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"\n",
|
|||
|
"alldata = pandas.read_csv('polynomial_logistic.tsv', sep='\\t')\n",
|
|||
|
"data = np.matrix(alldata)\n",
|
|||
|
"\n",
|
|||
|
"m, n_plus_1 = data.shape\n",
|
|||
|
"n = n_plus_1 - 1\n",
|
|||
|
"Xn = data[:, 1:]\n",
|
|||
|
"\n",
|
|||
|
"Xpl = powerme(data[:, 1], data[:, 2], n)\n",
|
|||
|
"Ypl = np.matrix(data[:, 0]).reshape(m, 1)\n",
|
|||
|
"\n",
|
|||
|
"data[:10]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 15,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Wykres danych (wersja macierzowa)\n",
|
|||
|
"def plot_data_for_classification(X, Y, xlabel, ylabel): \n",
|
|||
|
" fig = plt.figure(figsize=(16*.6, 9*.6))\n",
|
|||
|
" ax = fig.add_subplot(111)\n",
|
|||
|
" fig.subplots_adjust(left=0.1, right=0.9, bottom=0.1, top=0.9)\n",
|
|||
|
" X = X.tolist()\n",
|
|||
|
" Y = Y.tolist()\n",
|
|||
|
" X1n = [x[1] for x, y in zip(X, Y) if y[0] == 0]\n",
|
|||
|
" X1p = [x[1] for x, y in zip(X, Y) if y[0] == 1]\n",
|
|||
|
" X2n = [x[2] for x, y in zip(X, Y) if y[0] == 0]\n",
|
|||
|
" X2p = [x[2] for x, y in zip(X, Y) if y[0] == 1]\n",
|
|||
|
" ax.scatter(X1n, X2n, c='r', marker='x', s=50, label='Dane')\n",
|
|||
|
" ax.scatter(X1p, X2p, c='g', marker='o', s=50, label='Dane')\n",
|
|||
|
" \n",
|
|||
|
" ax.set_xlabel(xlabel)\n",
|
|||
|
" ax.set_ylabel(ylabel)\n",
|
|||
|
" ax.margins(.05, .05)\n",
|
|||
|
" return fig"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 16,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnAAAAFpCAYAAAAcIhVtAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3X9wHPd53/HPA4mgpyASi5RqK5Ro\nSSUaR1I6tIQqSc0JHf+KDDcixKiBVKWhE6bqr9SUKKeix23cUZNaSWfE0qnbWEUdOwlrI1FAih4z\nYWVJdoZTyzGkKBZpjwxYbmWJSsxQduYIpSDFe/rH7hKLwx1wB9zt7nf3/Zq5wd3uHu67d7d7z373\n+T5r7i4AAACEoy/vBgAAAKAzBHAAAACBIYADAAAIDAEcAABAYAjgAAAAAkMABwAAEBgCOAAAgMAQ\nwAEAAASGAA4AACAwF+fdgDxceumlftVVV+XdDAAAgAWeeuqpv3L3y5ZbrpIB3FVXXaWpqam8mwEA\nALCAmf3fdpbjFCoAAEBgCOAAAAACQwAHAAAQGAI4AACAwBDAAQAABIYADgAAIDCFCODM7BNm9h0z\nO95ivpnZR81sxsy+amY3pObtNLPp+LYzu1YDAADkoxABnKRPSrp5ifnvkTQU3+6S9N8kyczWS/qw\npB+RdJOkD5vZJT1tKQAAQM4KEcC5+59IemWJRbZL+h2PPCnp9WZ2uaSflPSou7/i7t+V9KiWDgQB\nAACCV4gArg0bJX079fjFeFqr6YuY2V1mNmVmU6dOnepZQwEAAHotlABu1dz9IXcfdvfhyy5b9hJj\n3Xxh6eDB6G870wEAAJYRSgD3kqQrU4+viKe1ml4chw5JO3ZI99wzH6y5R4937IjmA+gMB0YAKi6U\nAO6wpJ+LR6P+qKS/dveXJR2V9G4zuyQevPDueFpxjI5Ku3dL+/fPB3H33BM93r07mg+gMxwYAai4\ni/NugCSZ2aclvU3SpWb2oqKRpWskyd1/S9IRSSOSZiS9Kunn43mvmNl/kPSV+F/d7+5LDYbInpm0\nb190f//+6CZFwdu+fdH8InOPfgxHRxe2tdV0IAvpAyMp2pY4MAJQIeYVPNUwPDzsU1NT2b6ou9SX\n6vCs18MIfA4ejHo00gFnuhdxclK69da8W4kqSn8PE6EcGAEotNpcTRMnJjR9elpDG4Y0dt2YBtcO\nZvLaZvaUuw8vuxwBXAZC/qFpPOXb2NMRwjqgvEI9MAJQWMdeOKaRAyOqe12z52Y1sGZAfdanI3ce\n0dZNW3v++u0GcKHkwIWrMQCq1xfnxBVZcgo4aXNfH8EbiiHZttJC2KYAFFZtrqaRAyOqna1p9tys\nJGn23KxqZ6PpZ86eybmF8wjgeu3QocUBTzogCiHZOp3HlyB4Q55CPzBCbzA6Gas0cWJCda83nVf3\nuiaOT2TcotYI4HptdDTKE0sHPElANDkZRrI1PR0omjIcGKH7GJ2MVZo+PX2h563R7LlZzbwyk3GL\nWiOA6zWzKMm/sbeq1fSioacDRbTcgdH27fTEVBFlm7BKQxuGNLBmoOm8gTUD2rx+c8Ytao0ADkuj\npwNFtNyB0SOP0BNTReTsYpXGrhtTnzUPjfqsT2PXj2XcotYYhYqlUQcOIWL0dLUxOhmrEMooVAI4\nAOUUcvkerByfO7rgzNkzmjg+oZlXZrR5/WaNXT+mdf3rMnltArglEMABJdBO77BET0yV0POKEqAO\nHIByW27E4cGDjJ6uGnJ2USEEcADCtNSIw/e/X/riFxk9XTVlKNsEtIlTqADC1Srf6cd/XPrpn+Ya\nvgCCQw7cEgjggBJpNuJQYvR0uxhpDhQKOXAAyq/VVUKksAtoZ4mrFwBBIoADECauEtIdXL2g+7gm\nKzJAAAcgTIw47A6uXtB99GoiA+TAAQgTuVvdxdULuod6dFiFdnPgLs6iMQDQdUk+W7vT0VqrXEIC\njZVJejWlKGhLRkkTvKGLOIUKAFVGLmFvpIO4BMEbuogADgCqjFzC3mjVq0lAjC4hgAOAKuPqBd1H\nryYyQA4cAFQZuYTd16pXU4qmb9vGe4tVI4ADAKCbkl7N9EjoJIjbto1eTXQFARwAAN1EryYyQA4c\nAABAYAjgAAAAAkMABwAAEJhCBHBmdrOZPWdmM2a2t8n8fWb2THz7hpl9LzXvfGre4WxbDgAAkL3c\nAzgzu0jSxyS9R9K1ku4ws2vTy7j7Pe6+xd23SPpNSZOp2X+TzHP3WzJreBbcpYMHF9cMajUdABAu\n9vnoQO4BnKSbJM24+/PuflbSZyRtX2L5OyR9OpOW5e3QIWnHjoWFH5MCkTt2UCEdAMqEfT46UIQA\nbqOkb6cevxhPW8TM3iTpakmPpya/zsymzOxJM2tZXMfM7oqXmzp16lQ32t17o6OLq3enq3tTSwgA\nyoN9PjoQWh242yU97O7nU9Pe5O4vmdk1kh43s2fd/ZuNT3T3hyQ9JEnDw8Nh9EM3Vu/evz+6n67u\nDQAoB/b56EAReuBeknRl6vEV8bRmblfD6VN3fyn++7ykL0h6S/ebmKP0Bp1gQwaAcmKfjzYVIYD7\niqQhM7vazPoVBWmLRpOa2ZslXSLpS6lpl5jZ2vj+pZLeKulrmbS6l9IJq0kXetrdd5PMCgBl1Gyf\nn86JA2K5n0J199fM7JckHZV0kaRPuPsJM7tf0pS7J8Hc7ZI+477gW/xDkj5uZnVFwegD7h5+AJck\nsr7//dHjj3504f2PfnT+KI2jMgAoh8act3375h9L7POxgHkFo/rh4WGfmprKuxmtpTdiqXUgNznJ\ndfUAoCwOHowO3tM5b+nfA/b5lWBmT7n78LLLEcAVlHt0qvSjH52flmzUUtRLNzrK0RgAlIV78317\nq+koJQK4JQQRwEnRRtuXSlOs19l4AQArQ4AYhHYDuCIMYkAzJLICALqJQsGlQgBXRI2JrPX64uKO\nIeIyMSiR2lxN40+P675H79P40+OqzdXybhKwNAoFlwqnUIuorImsZV0vVM6xF45p5MCI6l7X7LlZ\nDawZUJ/16cidR7R109a8mwe01jhITqJQcMFwCjVko6NRMPPgg1GXtvt82ZDJSWn79jB7rDj6QwnU\n5moaOTCi2tmaZs/NSpJmz82qdjaafubsmZxbCCwhtELBnLlpiQCuiMyinqhHHlmYr2AWBTl79oSZ\nr5DsOJIgrq9vYb2jou5AgJSJExOqe73pvLrXNXF8IuMWAR0ILb+avL2WCOCKrIw9VqEd/QENpk9P\nX+h5azR7blYzr8xk3CKgTSHmV5fxd7BLcr8SA5ZQxgsbtzr6C3V9UDlDG4Y0sGagaRA3sGZAm9dv\nzqFVQBsOHVp81iP9G7NtW/HykMv4O9glDGIIQVnqwS11mRg2RuSlw9pYtbmaNj64UbWzi0edDvYP\n6uS9J7Wuf10WLQc6E3IduLL8DraBQQxlEVq+wlJaHf0l3eOd5DKQ2Ipu6TDHZnDtoI7ceUSD/YMa\nWDMgKep5G+yPphO8obCS/OrGwKfV9KIo0+9gN7l75W433nijB6Fed9+9212K/jZ7HJJ63X1ycnG7\nW01fyuTk4vch/f5MTnav3Si3FW5ntbmajz817nsf3evjT417ba6WccOBCijb72AbJE15G7FM7sFU\nHrdgAjiClNYquFGjh9Lfn+TG9wjIXwV/B9sN4MiBKzIPOF8hC05BSnSRd5hjw/YJ9F4FtzNy4Mog\n1HyFrFCSBN3iK8ixoT4V0Hv8DrZEAIdwreRHF2iU7sntpDYW9akA5Ig6cAhT449luiSJRE8c2rfS\n2ljUpwKQI3LgEKaDB6PTVOkfy3RQNzlZvIKUKKbV5th0mjsHAEsgBw7lNjoaBWnpno6kR2RyktNX\naN9qcmw4jQ8gJwRwCBOJrcjbSnPnAKALyIEDgJUI8bqSAEqDAA4AViI5jZ/OkUuCuG3bMjuNX5ur\naeLEhKZPT2tow5DGrhv
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f3a65818dd0>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"fig = plot_data_for_classification(Xpl, Ypl, xlabel=r'$x_1$', ylabel=r'$x_2$')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Propozycja hipotezy:\n",
|
|||
|
"\n",
|
|||
|
"$$ h_\\theta(x) = g(\\theta^T x) = g(\\theta_0 + \\theta_1 x_1 + \\theta_2 x_2 + \\theta_3 x_3 + \\theta_4 x_4 + \\theta_5 x_5) \\; , $$\n",
|
|||
|
"\n",
|
|||
|
"gdzie $g$ – funkcja logistyczna, $x_3 = x_1^2$, $x_4 = x_2^2$, $x_5 = x_1 x_2$."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 17,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def safeSigmoid(x, eps=0):\n",
|
|||
|
" y = 1.0/(1.0 + np.exp(-x))\n",
|
|||
|
" if eps > 0:\n",
|
|||
|
" y[y < eps] = eps\n",
|
|||
|
" y[y > 1 - eps] = 1 - eps\n",
|
|||
|
" return y\n",
|
|||
|
"\n",
|
|||
|
"def h(theta, X, eps=0.0):\n",
|
|||
|
" return safeSigmoid(X*theta, eps)\n",
|
|||
|
"\n",
|
|||
|
"def J(h,theta,X,y, lamb=0):\n",
|
|||
|
" m = len(y)\n",
|
|||
|
" f = h(theta, X, eps=10**-7)\n",
|
|||
|
" j = -np.sum(np.multiply(y, np.log(f)) + \n",
|
|||
|
" np.multiply(1 - y, np.log(1 - f)), axis=0)/m\n",
|
|||
|
" if lamb > 0:\n",
|
|||
|
" j += lamb/(2*m) * np.sum(np.power(theta[1:],2))\n",
|
|||
|
" return j\n",
|
|||
|
"\n",
|
|||
|
"def dJ(h,theta,X,y,lamb=0):\n",
|
|||
|
" g = 1.0/y.shape[0]*(X.T*(h(theta,X)-y))\n",
|
|||
|
" if lamb > 0:\n",
|
|||
|
" g[1:] += lamb/float(y.shape[0]) * theta[1:] \n",
|
|||
|
" return g\n",
|
|||
|
"\n",
|
|||
|
"def classifyBi(theta, X):\n",
|
|||
|
" prob = h(theta, X)\n",
|
|||
|
" return prob"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 18,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Metoda gradientu prostego dla regresji logistycznej\n",
|
|||
|
"def GD(h, fJ, fdJ, theta, X, y, alpha=0.01, eps=10**-3, maxSteps=10000):\n",
|
|||
|
" errorCurr = fJ(h, theta, X, y)\n",
|
|||
|
" errors = [[errorCurr, theta]]\n",
|
|||
|
" while True:\n",
|
|||
|
" # oblicz nowe theta\n",
|
|||
|
" theta = theta - alpha * fdJ(h, theta, X, y)\n",
|
|||
|
" # raportuj poziom błędu\n",
|
|||
|
" errorCurr, errorPrev = fJ(h, theta, X, y), errorCurr\n",
|
|||
|
" # kryteria stopu\n",
|
|||
|
" if abs(errorPrev - errorCurr) <= eps:\n",
|
|||
|
" break\n",
|
|||
|
" if len(errors) > maxSteps:\n",
|
|||
|
" break\n",
|
|||
|
" errors.append([errorCurr, theta]) \n",
|
|||
|
" return theta, errors"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 19,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"theta = [[ 1.59558981]\n",
|
|||
|
" [ 0.12602307]\n",
|
|||
|
" [ 0.65718518]\n",
|
|||
|
" [-5.26367581]\n",
|
|||
|
" [ 1.96832544]\n",
|
|||
|
" [-6.97946065]]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Uruchomienie metody gradientu prostego dla regresji logistycznej\n",
|
|||
|
"theta_start = np.matrix(np.zeros(Xpl.shape[1])).reshape(Xpl.shape[1],1)\n",
|
|||
|
"theta, errors = GD(h, J, dJ, theta_start, Xpl, Ypl, \n",
|
|||
|
" alpha=0.1, eps=10**-7, maxSteps=10000)\n",
|
|||
|
"print(r'theta = {}'.format(theta))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 20,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Wykres granicy klas\n",
|
|||
|
"def plot_decision_boundary(fig, theta, X):\n",
|
|||
|
" ax = fig.axes[0]\n",
|
|||
|
" xx, yy = np.meshgrid(np.arange(-1.0, 1.0, 0.02),\n",
|
|||
|
" np.arange(-1.0, 1.0, 0.02))\n",
|
|||
|
" l = len(xx.ravel())\n",
|
|||
|
" C = powerme(xx.reshape(l, 1), yy.reshape(l, 1), n)\n",
|
|||
|
" z = classifyBi(theta, C).reshape(int(np.sqrt(l)), int(np.sqrt(l)))\n",
|
|||
|
"\n",
|
|||
|
" plt.contour(xx, yy, z, levels=[0.5], lw=3);"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 21,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"/home/pawel/.local/lib/python2.7/site-packages/matplotlib/contour.py:967: UserWarning: The following kwargs were not used by contour: 'lw'\n",
|
|||
|
" s)\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnAAAAFpCAYAAAAcIhVtAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzs3Xd8U+X+B/DP0wmUIrSUVaZSQEBk\nFBVFUUEFHBQEKqJevTju/XkvSxQQt+IWrFflotXrQq2jDBVBBFGQIWXJElo2lF1GWkpH8v39kQRC\nmrRpm+SMfN6vV15Nzjnp+Xacc755zvd5HiUiICIiIiLjCNM6ACIiIiKqHCZwRERERAbDBI6IiIjI\nYJjAERERERkMEzgiIiIig2ECR0RERGQwTOCIiIiIDIYJHBEREZHBMIEjIiIiMpgIrQPQQv369aVl\ny5Zah0FERER0ntWrVx8VkYSKtgvJBK5ly5bIysrSOgwiIiKi8yildvuyHW+hEhERERkMEzgiIiIi\ng2ECR0RERGQwTOCIiIiIDIYJHBEREZHBMIEjIiIiMhgmcEREREQGo4sETin1oVLqsFJqo5f1Sin1\nllIqRyn1p1Kqq8u6vymlsh2PvwUvaiIiIiJt6CKBA/ARgL7lrO8HIMnxeBDANABQSsUBeBrA5QAu\nA/C0UqpeQCMlIiIi0pguEjgR+Q1AXjmbDADwiditAFBXKdUYwE0AFohInogcB7AA5SeCRERERIan\niwTOB4kA9rq83udY5m15GUqpB5VSWUqprCNHjgQs0DJEgJkz7V99WU5ERERUAaMkcNUmIu+JSLKI\nJCckVDhHrP/MmgUMGgSMGXMuWROxvx40yL6eiIiIqBKMksDtB9DM5XVTxzJvy/UjJQUYNQpISzuX\nxI0ZY389apR9PRFVDlu2iSjEGSWBmwPgHkdv1CsAnBSRAwDmA7hRKVXP0XnhRscy/VAKmDr1XBIX\nFnYueZs61b6eiCqHLdtEFOJ0kcAppb4AsBxAW6XUPqXUCKXUP5RS/3BsMhfADgA5AN4H8H8AICJ5\nAJ4HsMrxeM6xTF+cSZwroyRvbOkgPWLLNhGFuAitAwAAERlWwXoB8LCXdR8C+DAQcfmN8+LiaswY\nYyRxzpYO1xZD14tlZiYwcKDWUVKocf1QlJZmfwBs2SYiv7AUWZCxKQPZx7KRFJ+E1A6piI2O1Tqs\n8ygJwRaU5ORkycrKCs7O3FsGpk4t+1rPFxujx0/mJmIvS3Cy2fj/SETVsnTPUvSf0R82saGgpAAx\nkTEIU2GYO3wuejbvGfD9K6VWi0hyRdvp4haqqc2aVTbZca2J03utDmv4SK+8tWyH4IdSIvIPS5EF\n/Wf0h6XYgoKSAgBAQUkBLMX25fnF+RpHeA4TuEBLSbHfZnRNdpxJUWamMWp1jFzDR+bk3jJss5Wt\niaPQw5pdqqaMTRmwic3jOpvYkLExI8gReccELtCUsteIuSc73pbrEVs6SG+M3rJNgcHeyVRN2cey\nz7a8uSsoKUBOXk6QI/KOCRyVjy0dpEcVtWwPGMCWmFDE3slUTUnxSYiJjPG4LiYyBq3jWgc5Iu+Y\nwFH52NJBelRRy/bs2WyJCUWs2aVqSu2QijDlOTUKU2FI7Zga5Ii8Yy9UKp+I/WKXknL+yc/bciI9\nYO/p0MbeyVQNRumFygSOiMzJNYlzYvJmfvy7kx/kF+cjY2MGcvJy0DquNVI7pqJ2VO2g7JsJXDmY\nwBGZgC+twwBbYkIJW17JBDgOHBGZW0U9DmfOZO/pUMOaXQohTOCIyJjK63E4ciTw66/sPR1qzDDu\nJpGPeAuViIzLW73TNdcAt9/OOXyJyHBYA1cOJnBEJuKpxyHA3tNEZEisgSMi8/M2Swhg/BlQgoXT\nTxEZEhM4IjImzhLiH5x+isiQmMARkTGxx6F/cPop/2OrJgUBa+CIyJg4S4j/cPBb/5o50956yU40\nVAXsxFAOJnBERG44/ZT/cEBhqgZ2YiAiIt946wwSgh/w/cL9dn5YGJM38jsmcEREoYydQQLDmcS5\nYvJGfsQEjogolLEzSGCwVZMCjAkcEVEo4/RT/sdWTQqCCK0DICIiDTkHN/Z1OVXMW6smYF/eqxd/\nt1RtTOCIiIj8ydmq6TqUjTOJ69WLrZrkF0zgiIiI/ImtmhQErIEjIiIiMhgmcEREREQGwwSOiIiI\nyGB0kcAppfoqpbYqpXKUUhM8rJ+qlFrneGxTSp1wWWd1WTcnuJETERERBZ/mCZxSKhzAOwD6AWgP\nYJhSqr3rNiIyRkQ6i0hnAP8BkOmyutC5TkRuC1rgwSBinxTZfcwgb8uJiMi4eM6nStA8gQNwGYAc\nEdkhIsUAvgQwoJzthwH4IiiRaW3WLGDQoPMHfnQOEDloEEdIJyIyE57zqRL0kMAlAtjr8nqfY1kZ\nSqkWAFoBWOSyuIZSKksptUIp5XVwHaXUg47tso4cOeKPuAMvJaXs6N2uo3tzLCEiIvPgOZ8qwWjj\nwN0B4BsRsbosayEi+5VSFwJYpJTaICLb3d8oIu8BeA8AkpOTjdEO7T56d1qa/bnr6N5ERGQOPOdT\nJeihBW4/gGYur5s6lnlyB9xun4rIfsfXHQAWA+ji/xA15HpAO/FAJiIyJ57zyUd6SOBWAUhSSrVS\nSkXBnqSV6U2qlGoHoB6A5S7L6imloh3P6wO4CsDmoEQdSK4Fq84mdFejR7OYlYjIjDyd811r4ogc\nNL+FKiKlSql/AZgPIBzAhyKySSn1HIAsEXEmc3cA+FLkvP/iiwFMV0rZYE9GXxYR4ydwzkLWkSPt\nr9966/znb7117lMaP5UREZmDe83b1KnnXgM859N5lIRgVp+cnCxZWVlah+Gd60EMeE/kMjM5rx4R\nkVnMnGn/8O5a8+Z6PeA5PyQopVaLSHKF2zGB0ykR+63St946t8x5UAP2VrqUFH4aIyIyCxHP53Zv\ny8mUmMCVwxAJHGA/aMNcyhRtNh68RERUNUwQDcHXBE4PnRjIExayEhGRP3GgYFNhAqdH7oWsNlvZ\nwR2JiIgqgwMFm4rmvVDJg1mzzu+F5D64Y69exixkZfM9mYilyIKMTRnIPpaNpPgkpHZIRWx0rNZh\nEXnHgYJNhTVwemTWRIc9rMgklu5Ziv4z+sMmNhSUFCAmMgZhKgxzh89Fz+Y9tQ6PqHysr9Y11sAZ\nmVLnEhnngL7lLTcKNt+TCViKLOg/oz8sxRYUlBQAAApKCmApti/PL87XOEKichitvtp1YHtflocQ\nJnB6ZraCU2fzvTOJCwsre6uYSOcyNmXAJjaP62xiQ8bGjCBHROQjI9ZXm+066EdM4PTMjC1WnOeP\nDC77WPbZljd3BSUFyMnLCXJERD7yVl/tvM7oMRky43XQT9iJQc/MWHDqrfneqD8PhZyk+CTERMZ4\nTOJiImPQOq61BlER+SAlxV5r7FpH7bzO9Oqlz2TIjNdBP2EnBiMwS8FpefP88WAkrVSy05ClyILE\nKYmwFFvKfKvYqFjkPpKL2lG1gxE5Uegwy3XQB+zEYBZGKzgtjz+b71nYSv5SyRqb2OhYzB0+F7FR\nsYiJjAFgb3mLjbIvZ/JG5Gdmug76k4iE3KNbt25iCDabyKhRIoD9q6fXRmKziWRmlo3b2/LyZGaW\n/T24/n4yM/0XN5lbFY8zS5FF0leny4QFEyR9dbpYiixBDpwoBJjtOugDAFniQy6jeTKlxcMwCRyT\nFO9C8KCmAHL9/3E++H9EpL0QvA76msCxBk7PxKQD+vqLuNTUObGWjqpKKlljw+OTKPBC8DhjDZwZ\nOAfudf/n9LY81HBIEvIXqUKNDcenIgo8Xge9YgJHxlWViy6RO9eW3MoMbsrxqYhIQxwHjozJ/WLp\nOiQJwJY48p233tGAfXmvXp7n6OX4VESkIdbAkTHNnGm/TeV6sXRN6jIzPV90idxVt8amsrVz5bCW\nWlFw6jROnyp0PE7jzOl
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f3a5ad7fc90>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"fig = plot_data_for_classification(Xpl, Ypl, xlabel=r'$x_1$', ylabel=r'$x_2$')\n",
|
|||
|
"plot_decision_boundary(fig, theta, Xpl)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 22,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Wczytanie danych\n",
|
|||
|
"\n",
|
|||
|
"alldata = pandas.read_csv('polynomial_logistic.tsv', sep='\\t')\n",
|
|||
|
"data = np.matrix(alldata)\n",
|
|||
|
"\n",
|
|||
|
"m, n_plus_1 = data.shape\n",
|
|||
|
"Xn = data[:, 1:]\n",
|
|||
|
"\n",
|
|||
|
"n = 10\n",
|
|||
|
"Xpl = powerme(data[:, 1], data[:, 2], n)\n",
|
|||
|
"Ypl = np.matrix(data[:, 0]).reshape(m, 1)\n",
|
|||
|
"\n",
|
|||
|
"theta_start = np.matrix(np.zeros(Xpl.shape[1])).reshape(Xpl.shape[1],1)\n",
|
|||
|
"theta, errors = GD(h, J, dJ, theta_start, Xpl, Ypl, \n",
|
|||
|
" alpha=0.1, eps=10**-7, maxSteps=10000)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 23,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnAAAAFpCAYAAAAcIhVtAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzs3Xd4U9UbB/Dv6aSUssoQypaKMmRV\nREFwIGJRGaJVcaMo/pSpAg5wg6BgcWtxo9TRImoFERUpglCGCAi0rAIFWspKS+lI3t8fSSC0aZuW\nJPfe5Pt5njxN7r1p3qa597459z3nKBEBERERERlHgNYBEBEREVHVMIEjIiIiMhgmcEREREQGwwSO\niIiIyGCYwBEREREZDBM4IiIiIoNhAkdERERkMEzgiIiIiAyGCRwRERGRwQRpHYAWGjRoIK1atdI6\nDCIiIqKzrF279rCINKxsO79M4Fq1aoW0tDStwyAiIiI6i1Jqjyvb8RIqERERkcEwgSMiIiIyGCZw\nRERERAbDBI6IiIjIYJjAERERERkMEzgiIiIig2ECR0RERGQwukjglFIfKaWylVKbylmvlFJzlFIZ\nSqmNSqluDuvuUUql2273eC9qIiIiIm3oIoED8AmAARWsvx5AtO02EsC7AKCUqg9gKoBLAfQAMFUp\nVc+jkRIRERFpTBcJnIj8CeBIBZsMAvCZWK0CUFcp1QTAdQCWiMgRETkKYAkqTgSJiIiIDE8XCZwL\nogDsdXi8z7asvOVlKKVGKqXSlFJpOTk5Hgu0DBEgOdn605XlRERERJUwSgJ3zkTkAxGJEZGYhg0r\nnSPWfRYsAIYOBcaNO5OsiVgfDx1qXU9ERERUBUZJ4PYDaO7wuJltWXnL9WPwYGDMGCA+/kwSN26c\n9fGYMdb1RFQ1bNkm8ls7/tmNpPifYDabtQ5FU0ZJ4BYCuNvWG7UngOMicgDAYgD9lVL1bJ0X+tuW\n6YdSwOzZZ5K4gIAzydvs2db1RFQ1bNkm8ltLv/gT7477BGMufxo7/tmtdTia0UUCp5T6CsBKAO2U\nUvuUUiOUUg8rpR62bZICYCeADAAfAngEAETkCIAXAayx3V6wLdMXexLnyCjJG1s6SI/Ysk3ktx6c\ncRcmzxuDQ3sO45GYiZg7eR4KCwq1DsvrlPjhCTgmJkbS0tK894KOJxc7o7TAJSdbWzQc43X8e5KS\ngCFDtI6S/JGR9ysiOmcnjpjw4ROfY9HHv6Pp+Y0xPmEUOvft4JbfbSo0IXFzItJz0xEdGY24DnGI\nCI1wy++ujFJqrYjEVLodEzgPK90yMHt22cd6PtkYPX7ybSLWsgQ7i4WfRyI/s/63fzF75Ps4sPMQ\nbhx1HR6YPhw1I8Kq/ftSM1MROy8WFrEgvzgf4cHhCFABSBmegt4tersxcueYwFXAqwmcL7RgsaWD\n9IifSyKyKcg/hU+emY/kOSlo3LIBxn04Ct2u6VTl32MqNCFqVhRMRaYy6yJCIpA1IQu1Qmq5I+Ry\nuZrA6aIGzqcNHmxN0hxPKvaauKQkY9TqGLmGj3xT6ZZhi6VsTRz5H9bs+q2w8BoYNftezPrzBQSF\nBGHitS8gftQHKMgrqNLvSdycCItYnK6ziAWJmxLdEa5bMIHzNKWsLWylk53yluuR/WTpiCdJ0tKC\nBWUv4zv29mYvVP/E3sl+r2OvC/He+pkYNv5G/PTBr3i46xPYtGKry89Pz01HfnG+03X5xfnIOJLh\nrlDPGRM4qhhbOkiPKmvZHjSILTH+iL2TCUBoWCgeeu1uvPb7c7BYBBP6TsGHE79AUWFxpc+NjoxG\neHC403XhweFoW7+tu8OtNtbAUcV8oYaP/A8/t/6LtZHk4KSpAO9P+BQpCUvRulMLTJ43Bq07tih3\neyPVwDGBo4qJWC87DB589sGvvOVEesDe0/6NvZOplL9/WovXRryL/OMnMXLmXRj0vwFQ5Xwm2AtV\nx5jAEfkBtsT4J/7fqRxHDx3DayPeweqU9egR2xWPz30E9RrXdbptXlEeEjclIuNIBtrWb4u4jnEe\nb3mzYwJXASZwRD7AldZhgC0x/oQtr1QJEcH3by/CB098jvA6NTHp88fQ/drOWod1Fg4jQkS+rbIe\nh8nJ7D3tb9g7mSqhlMLgR6/H22umo27D2pg84GV8MmU+zGaz1qFVGRM4IjKminocjh4NLFvG3tP+\nxhfG3SSvaN2xBeasegXX3tMX8176DhOvfRG5B45qHVaV8BIqERlXefVOffoAN9/MXqhEVKnFn/yO\nN/+XgLCIMLy79lU0iIrUNB7WwFWACRyRD3HW4xBg72kictnuzXvx25fLcd9Lt5fbO9VbWANHRL6v\nvFlCAOPPgOItnH6KCK06NMf9L9+hefJWFUzgiMiYOEuIe3D6KSJDYgJHRMbEHofuwemn3I+tmuQF\nrIEjImPiLCHuw8Fv3YtTudE5YCeGCjCBIyIqhdNPuQ8HFKZzwE4MRETkmvI6g/jhF3y3KH05PyCA\nyRu5HRM4IiJ/xs4gnmFP4hwxeSM3YgJHROTP2BnEM9iqSR7GBI6IyJ9x+in3Y6smeUGQ1gEQEZGG\n7IMbu7qcKldeqyZgXd63L99bOmdM4IiIiNzJ3qrpOJSNPYnr25etmuQWTOCIiIjcia2a5AWsgSMi\nIiIyGCZwRERERAbDBI6IiIjIYHSRwCmlBiiltimlMpRSk5ysn62U2mC7bVdKHXNYZ3ZYt9C7kRMR\nERF5n+YJnFIqEMDbAK4H0B7A7Uqp9o7biMg4EekiIl0AvAkgyWF1gX2diNzktcC9QcQ6KXLpMYPK\nW05ERMbFYz5VgeYJHIAeADJEZKeIFAGYD2BQBdvfDuArr0SmtQULgKFDzx740T5A5NChHCGdiMiX\n8JhPVaCHBC4KwF6Hx/tsy8pQSrUE0BrAbw6Layil0pRSq5RS5Q6uo5QaadsuLScnxx1xe97gwWVH\n73Yc3ZtjCRER+Q4e86kKjDYO3G0AvhURs8OyliKyXynVBsBvSql/RWRH6SeKyAcAPgCAmJgYY7RD\nlx69Oz7eet9xdG8iIvINPOZTFeihBW4/gOYOj5vZljlzG0pdPhWR/bafOwH8AaCr+0PUkOMObccd\nmYjIN/GYTy7SQwK3BkC0Uqq1UioE1iStTG9SpdSFAOoBWOmwrJ5SKtR2vwGAXgC2eCVqT3IsWLU3\noTsaO5bFrEREvsjZMd+xJo7IRvNLqCJSopR6FMBiAIEAPhKRzUqpFwCkiYg9mbsNwHyRsz7FFwF4\nXyllgTUZnS4ixk/g7IWso0dbH8+Zc/b9OXPOfEvjtzIiIt9QuuZt9uwzjwEe8+ksSvwwq4+JiZG0\ntDStwyif404MlJ/IJSVxXj0iIl+RnGz98u5Y8+Z4PuAx3y8opdaKSEyl2zGB0ykR66XSOXPOLLPv\n1IC1lW7wYH4bIyLyFSLOj+3lLSefxASuAoZI4ADrThvgUKZosXDnJSKi6mGCaAiuJnB66MRAzrCQ\nlYiI3IkDBfsUJnB6VLqQ1WIpO7gjERFRVXCgYJ+ieS9UcmLBgrN7IZUe3LFvX2MWsrL5nnyIqdCE\nxM2JSM9NR3RkNOI6xCEiNELrsIjKx4GCfQpr4PTIVxMd9rAiH5GamYrYebGwiAX5xfkIDw5HgApA\nyvAU9G7RW+vwiCrG+mpdYw2ckSl1JpGxD+hb0XKjYPM9+QBToQmx82JhKjIhvzgfAJBfnA9TkXV5\nXlGexhESVcBo9dWOA9u7styPMIHTM18rOLU339uTuICAspeKiXQucXMiLGJxus4iFiRuSvRyREQu\nMmJ9ta+dB92ICZye+WKLFef5I4NLz00/3fJWWn5xPjKOZHg5IiIXlVdfbT/P6DEZ8sXzoJuwE4Oe\n+WLBaXnN90b9e8jvREdGIzw43GkSFx4cjrb122oQFZELBg+21ho71lHbzzN9++ozGfLF86CbsBOD\nEfhKwWlF8/xxZyStVLH
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f3a5a0f5710>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Przykład dla większej liczby cech\n",
|
|||
|
"fig = plot_data_for_classification(Xpl, Ypl, xlabel=r'$x_1$', ylabel=r'$x_2$')\n",
|
|||
|
"plot_decision_boundary(fig, theta, Xpl)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## 2.6. Problem nadmiernego dopasowania"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Obciążenie a wariancja"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 24,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Dane do prostego przykładu\n",
|
|||
|
"\n",
|
|||
|
"data = np.matrix([\n",
|
|||
|
" [0.0, 0.0],\n",
|
|||
|
" [0.5, 1.8],\n",
|
|||
|
" [1.0, 4.8],\n",
|
|||
|
" [1.6, 7.2],\n",
|
|||
|
" [2.6, 8.8],\n",
|
|||
|
" [3.0, 9.0],\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"m, n_plus_1 = data.shape\n",
|
|||
|
"n = n_plus_1 - 1\n",
|
|||
|
"Xn1 = data[:, 0:n]\n",
|
|||
|
"Xn1 /= np.amax(Xn1, axis=0)\n",
|
|||
|
"Xn2 = np.power(Xn1, 2) \n",
|
|||
|
"Xn2 /= np.amax(Xn2, axis=0)\n",
|
|||
|
"Xn3 = np.power(Xn1, 3) \n",
|
|||
|
"Xn3 /= np.amax(Xn3, axis=0)\n",
|
|||
|
"Xn4 = np.power(Xn1, 4) \n",
|
|||
|
"Xn4 /= np.amax(Xn4, axis=0)\n",
|
|||
|
"Xn5 = np.power(Xn1, 5) \n",
|
|||
|
"Xn5 /= np.amax(Xn5, axis=0)\n",
|
|||
|
"\n",
|
|||
|
"X1 = np.matrix(np.concatenate((np.ones((m, 1)), Xn1), axis=1)).reshape(m, n + 1)\n",
|
|||
|
"X2 = np.matrix(np.concatenate((np.ones((m, 1)), Xn1, Xn2), axis=1)).reshape(m, 2 * n + 1)\n",
|
|||
|
"X5 = np.matrix(np.concatenate((np.ones((m, 1)), Xn1, Xn2, Xn3, Xn4, Xn5), axis=1)).reshape(m, 5 * n + 1)\n",
|
|||
|
"y = np.matrix(data[:, -1]).reshape(m, 1)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 25,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmAAAAFoCAYAAADw0EcgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAFJlJREFUeJzt3X+s3fd91/HX23G7rvdebelqZVlb\naMBXLaV/0GGqbr1CVduJzKAGRuFm0tZ28hRAdOvwBCsDUWlCUCY0GGgMorR0QGlvlVUsDMMo7aph\nwaI4WaBNQnUvha6J08VlqLu5DNrsfvjjHM+eZyc3ie/7a9/zeEjWued7ju955+vjk6e/v26NMQIA\nQJ9DUw8AALBoBBgAQDMBBgDQTIABADQTYAAAzQQYAECzfQuwqvpgVT1RVZ+9aNlLquoTVbU5v71x\nv14fAOBatZ9bwD6U5NZLlr03ySfHGKtJPjm/DwCwUGo/L8RaVa9M8vNjjNfO738uyZvGGI9X1c1J\nPj3GeNW+DQAAcA3qPgbspjHG4/Ovv5TkpubXBwCY3OGpXniMMarqipvfquqOJHckydLS0h9+9atf\n3TYbAMBe3H///V8eYxx5tr+vO8B+rapuvmgX5BNXeuIY484kdybJsWPHxpkzZ7pmBADYk6r6wnP5\nfd27IO9J8s751+9M8nPNrw8AMLn9vAzFR5L85ySvqqpHq+pEkvcn+Y6q2kzy1vl9AICFsm+7IMcY\n332Fh96yX68JAHA9cCV8AIBmAgwAoJkAAwBoJsAAAJoJMACAZgIMAKCZAAMAaCbAAACaCTAAgGYC\nDACgmQADAGgmwAAAmgkwAIBmAgwAoJkAAwBoJsAAAJoJMACAZgIMAKCZAAMAaHZ46gEAOIC2t5ON\njWRzM1ldTdbXk5WVqaeCa4YAA+DqOn06OX482d1NdnaSpaXk5Mnk1KlkbW3q6eCaYBckAFfP9vYs\nvra3Z/GVzG7PL3/yyWnng2uEAAPg6tnYmG35upzd3dnjgAAD4Cra3Lyw5etSOzvJ1lbvPHCNEmAA\nXD2rq7Njvi5naSk5erR3HrhGCTCARbS9ndx1V/IjPzK73d6+Ot93fT05dIX/tRw6NHsccBYkwMLZ\nz7MUV1Zm3+fS73/o0Gz58vLV+W+A65wAA1gkF5+leN75Y7aOH0/Onn3+kbS2Nvs+GxuzY76OHp1t\n+RJf8NsEGMAi2ctZiidOPP/XWV6+Ot8HDijHgAEsEmcpwjVBgAEsEmcpwjVBgAEsEmcpwjVBgAEs\nkvNnKa6sXNgStrR0YbkD5aGFg/ABFo2zFGFyAgxgETlLESZlFyQAQDMBBgDQTIABADQTYAAAzQQY\nAEAzAQYA0EyAAQA0E2AAAM0EGABAMwEGANBMgAEANBNgAADNBBgAQLNJAqyq/lJVPVRVn62qj1TV\ni6aYAwBgCu0BVlUvS/KDSY6NMV6b5IYkt3fPAQAwlal2QR5O8vVVdTjJi5OcnWgOAIB27QE2xngs\nyd9N8qtJHk/ylTHGv7/0eVV1R1Wdqaoz586d6x4TAGDfTLEL8sYktyW5Jcm3JFmqqu+59HljjDvH\nGMfGGMeOHDnSPSYAwL6ZYhfkW5P8jzHGuTHG15J8PMm3TzAHAMAkpgiwX03yhqp6cVVVkrckeWSC\nOQAAJjHFMWD3Jrk7yQNJPjOf4c7uOQAApnJ4ihcdY7wvyfumeG0AgKm5Ej4AQDMBBgDQTIABADQT\nYAAAzQQYAEAzAQYA0EyAAQA0E2AAAM0EGABAMwEGANBMgAEANBNgAADNBBgAQLPDUw8AMKnt7WRj\nI9ncTFZXk/X1ZGVl6qmAA06AAYvr9Onk+PFkdzfZ2UmWlpKTJ5NTp5K1tamnAw4wuyCBxbS9PYuv\n7e1ZfCWz2/PLn3xy2vmAA02AAYtpY2O25etydndnjwPsEwEGLKbNzQtbvi61s5NsbfXOAywUAQYs\nptXV2TFfl7O0lBw92jsPsFAEGLCY1teTQ1f4CDx0aPY4wD4RYMBiWlmZne24snJhS9jS0oXly8vT\nzgccaC5DASyutbXk7NnZAfdbW7Pdjuvr4gvYdwIMWGzLy8mJE1NPASwYuyABAJoJMACAZgIMAKCZ\nAAMAaCbAAACaCTAAgGYCDACgmQADAGgmwAAAmgkwAIBmAgwAoJkAAwBoJsAAAJoJMACAZgIMAKCZ\nAAMAaCbAAACaCTAAgGYCDACgmQADAGgmwAAAmgkwAIBmAgwAoJkAAwBoNkmAVdU3VtXdVfXfquqR\nqvq2KeYAAJjC4Yle9yeT/Lsxxtur6oVJXjzRHAAA7doDrKq+IckfTfKuJBljfDXJV7vnAACYyhS7\nIG9Jci7JP62qX6mqu6pqaYI5AAAmMUWAHU7yrUl+eozxuiQ7Sd576ZOq6o6qOlNVZ86dO9c9IwDA\nvpkiwB5N8ugY4975/bszC7LfYYxx5xjj2Bjj2JEjR1oHBADYT+0BNsb4UpIvVtWr5ovekuTh7jkA\nAKYy1VmQP5Dkw/MzID+f5PsmmgMAoN0kATbGeDDJsSleGwBgaq6EDwDQTIABADQTYAAAzQQYAEAz\nAQYA0EyAAQA0E2AAAM0EGABAMwEGANBMgAEANBNgAADNBBgAQDMBBgDQTIABADQTYAAAzQQYAEAz\nAQYA0EyAAQA0E2AAAM0EGABAMwEGANBMgAEANBNgAADNBBgAQDMBBgDQ7PDUAwALZns72dhINjeT\n1dVkfT1ZWZl6KoBWAgzoc/p0cvx4srub7OwkS0vJyZPJqVPJ2trU0wG0sQsS6LG9PYuv7e1ZfCWz\n2/PLn3xy2vkAGgkwoMfGxmzL1+Xs7s4eB1gQAgzosbl5YcvXpXZ2kq2t3nkAJiTAgB6rq7Njvi5n\naSk5erR3HoAJCTCgx/p6cugKHzmHDs0eB1gQAgzosbIyO9txZeXClrClpQvLl5ennQ+gkctQAH3W\n1pKzZ2cH3G9tzXY7rq+LL2DhCDCg1/JycuLE1FMATMouSACAZs8YYFX1A1V1Y8cwAACLYC9bwG5K\ncl9Vfayqbq2q2u+hAAAOsmcMsDHGX0+ymuQDSd6VZLOq/lZV/f59ng0A4EDa0zFgY4yR5EvzX08l\nuTHJ3VX14/s4GwDAgfSMZ0FW1XuSvCPJl5PcleQvjzG+VlWHkmwm+Sv7OyIAwMGyl8tQvCTJd40x\nvnDxwjHGblX9if0ZCwDg4HrGABtjvO9pHnvk6o4DAHDwuQ4YAEAzAQYA0EyAAQA0E2AAAM0EGABA\ns8kCrKpuqKpfqaqfn2oGAIApTLkF7D1JXMYCAFg4kwRYVb08yR/P7Mr6AAALZaotYH8/sx9htHul\nJ1TVHVV1pqrOnDt3rm8yAIB91h5g8x9f9MQY4/6ne94Y484xxrExxrEjR440TQcAsP+m2AL2xiRv\nq6r/meSjSd5cVf9igjkAACbRHmBjjL86xnj5GOOVSW5P8qkxxvd0zwEAMBXXAQMAaHZ4yhcfY3w6\nyaennAEAoJstYAAAzQQYAEAzAQYA0EyAAQA0E2AAAM0EGABAMwEGANBMgAEANBNgAADNBBgAQDMB\nBgDQTIABADQTYAAAzQQYAEAzAQYA0EyAAQA0E2AAAM0EGABAMwEGANBMgAEANBNgAADNBBgAQDMB\nBgDQTIABADQTYAAAzQQYAEAzAQYA0EyAAQA0E2AAAM0EGABAMwEGANBMgAEANBNgAADNBBgAQDMB\nBgDQTIABADQTYAAAzQQYAEAzAQYA0EyAAQA0E2AAAM0EGABAMwEGANBMgAEANBNgAADNBBgAQDMB\nBgDQrD3AquoVVfWLVfVwVT1UVe/pngEAYEqHJ3jNp5L88BjjgapaSXJ/VX1ijPHwBLMAALRr3wI2\nxnh8jPHA/OvtJI8keVn3HAAAU5n0GLCqemWS1yW5d8o5AAA6TRZgVbWc5GeT/NAY4zcu8/gdVXWm\nqs6cO3euf0AAgH0ySYB
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f3a4e4caed0>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"fig = plot_data(X1, y, xlabel='x', ylabel='y')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 26,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"[<matplotlib.lines.Line2D at 0x7f3a5ac3d1d0>]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 26,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmAAAAFoCAYAAADw0EcgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3Xl8VPW9//H3JzskIWxh3yHsS6LU\npaJWrEvRioqY+Oh+ubW9ty1RrFu1al1qcUP0trc/rt3ubUsiioqKS11aReuCJuxLwr4TtjAJZJ3v\n74+JShEkCZlzZnk9Hw8eSWYmc94cJod3zvIZc84JAAAA3knwOwAAAEC8oYABAAB4jAIGAADgMQoY\nAACAxyhgAAAAHqOAAQAAeCxsBczMfm9mu81s+RG3dTazv5lZWdPHTuFaPgAAQKQK5x6wP0q6+Kjb\nbpH0unMuR9LrTV8DAADEFQvnIFYzGyDpBefc6Kav10j6inNuh5n1lPR359ywsAUAAACIQF6fA9bd\nObej6fOdkrp7vHwAAADfJfm1YOecM7Pj7n4zs2slXStJ6enppw4fPtyzbACAk7dp7yEdrKlXekqi\nBmZnyPwOBITBRx99tMc5l93S7/O6gO0ys55HHILcfbwHOufmSJojSePHj3eLFy/2KiMA4CT95f1N\nuu2Z5eqRlqSXCs9Wn07t/Y4EhIWZbWrN93l9CHKBpO80ff4dSc95vHwAQJiV7w7onhdWSpLuu2IM\n5Qs4hnCOoZgr6Z+ShpnZVjObJulXki4wszJJX236GgAQI2obGvWTuaWqqQ9qyil9dNm4Xn5HAiJS\n2A5BOueuOc5d54drmQAAfz3w8hqt2nFQ/bu01y8mj/I7DhCxmIQPAGgT/1hbod8t2qCkBNPsgjxl\npPp2nRcQ8ShgAICTtqeqVjc8uUSSdP0FQ5Xbt6PPiYDIRgEDAJwU55xunLdEe6pqdfrAzvrhuYP9\njgREPAoYAOCk/O8/N+nNNRXKapesWfm5Skxg4hdwIhQwAECrrd55UPctXCVJuv/KMerVsZ3PiYDo\nQAEDALRKTX2jCueWqq4hqPzxfTVpTE+/IwFRgwIGAGiV+xeu0ppdAQ3qmq47vj7S7zhAVKGAAQBa\n7I3Vu/Snf25ScmJo5EQ6IyeAFqGAAQBaZHegRj+dt1SS9NMLh2lMnyyfEwHRhwIGAGi2YNDphieX\naF91nc4a0kXfP3uQ35GAqEQBAwA02+/f2aC3y/aoU/tkPTw1VwmMnABahQIGAGiWFdsr9cDLayRJ\nM6eMVY+sNJ8TAdGLAgYAOKHDdY2aPrdEdY1BfeP0frpwVA+/IwFRjQIGADihe19cqXUV1RrSLUO3\nX8LICeBkUcAAAF/olRU79Zf3NyslMUGzC3LVLiXR70hA1KOAAQCOa2dljW5+OjRy4qaLh2lUL0ZO\nAG2BAgYAOKZg0GnGk6U6cKhe5wzN1r+dNdDvSEDMoIABAI5pztvr9e66veqSnqKHpo5l5ATQhihg\nAIDPWbr1gB56JTRy4sGpY9Utk5ETQFvizbsAAP+iurZBhUWlagg6fefM/po4vHvLnyQQkIqLpbIy\nKSdHys+XMjPbPiwQpShgAIB/cffzK7VhT7WGdc/UrZNGtPwJFi2SJk2SgkGpulpKT5dmzJAWLpQm\nTGj7wEAU4hAkAOBTC5ftUPHiLUpJStDsa3KVltzCkROBQKh8BQKh8iWFPn5ye1VV24cGohAFDAAg\nSdp+4LBuaRo5cdukERreo0PLn6S4OLTn61iCwdD9AChgAACpMeh0XXGpDtY0aOLwbvr2mf1b90Rl\nZZ/t+TpadbVUXt76kEAMoYABAPTbf6zTBxv2qWtGqh64aqzMWjlyIicndM7XsaSnS0OGtD4kEEM4\nCR8A4tERVymW9B2pR7Z1lSQ9fPU4dc1Ibf3z5ueHTrg/loSE0P0AKGAAEHeOuEqxqj6own97XI1Z\n0rSBKTp3aPbJPXdmZuhqx6OvgkxICN2ekdE2fwcgylHAACCeHHmVoqQ7Jl2vzVk9NGLXet302zul\nb2w++ZI0YYK0fXtoD1t5eeiwY34+5Qs4AgUMAOLJEVcpPjfiHM0fc77S6mv02PMPKrWhLnT/tGkn\nv5yMjLZ5HiBGUcAAIJ40XaW4pUM33X7RjyRJt7/xO+Xs3RK6n6sUAU9wFSQAxJOcHDVkZOr6r9+g\nQGq6Llj7T32j9KXQfVylCHiGAgYA8SQ/X//1pSla3GeUugX2aubLj+vTgRNcpQh4hkOQABBHPtpX\nr8dOu0rmgpr1+m/U+fBBrlIEfEABA4A4cbCmXoVFpQpK+sGX++msod+Xys/jKkXABxQwAIgTP392\nubbuP6zRvTvohkvGSEnj/I4ExC3OAQOAOPBMyVY9V7pd7ZITNbsgTylJbP4BP/ETCAAxbvPeQ/r5\nsyskSXddNlKDsznUCPiNAgYAMay+MajpRSWqqm3Q10b30NXj+/odCYAoYAAQ0x57vUylWw6oZ1aa\n7r9yjMzsxN8EIOwoYAAQo95fv1e/frNcZtKs/Fx1bJ/idyQATShgABCDKg/V6/riUgWd9J9fGawz\nBnXxOxKAI1DAACDGOOf0s2eXaXtljcb17ajrvjrU70gAjkIBA4AYM++jrXpx6Q6lpyRqdn6ukhPZ\n1AORhp9KAIghG/ZU664FoZETv5g8WgO6pvucCMCxUMAAIEbUNQRVWFSiQ3WN+vq4XppySm+/IwE4\nDgoYAMSIWa+t1dKtlerdsZ3uvXw0IyeACEYBA4AY8O66PfrtP9YpwaRHC3KV1S7Z70gAvgAFDACi\n3P7qOs0oXiLnpB9PzNGXBnT2OxKAE/ClgJnZ9Wa2wsyWm9lcM0vzIwcARDvnnG6Zv1Q7D9bolH4d\nNX3iEL8jAWgGzwuYmfWWNF3SeOfcaEmJkgq8zgEAsaDowy16ZcUuZaYmaXZBnpIYOQFEBb9+UpMk\ntTOzJEntJW33KQcARK3y3VX6xfOhkRP3XjFafTu39zkRgObyvIA557ZJekjSZkk7JFU65149+nFm\ndq2ZLTazxRUVFV7HBICIVtvQqMKiEtXUB3VFXm9NzmXkBBBN/DgE2UnSZEkDJfWSlG5m3zz6cc65\nOc658c658dnZ2V7HBICI9tAra7Ri+0H17dxOd08e5XccAC3kxyHIr0ra4JyrcM7VS5ov6cs+5ACA\nqPR2WYX+5+0NSkwwzS7IU2YaIyeAaONHAdss6Qwza2+hKYHnS1rlQw4AiDp7q2o148klkqTrzs/R\nKf06+ZwIQGv4cQ7Y+5KekvSxpGVNGeZ4nQMAoo1zTjc/vVQVgVqdNqCz/vM8Rk4A0SrJj4U65+6U\ndKcfywaAaPXn9zbptVW7lZmWpFkFuUpM4K2GgGjFwBgAiAJrdwV074uhszXuv3KMends53MiACeD\nAgYAEa6mvlHT55aotiGoqaf20aVje/kdCcBJooABQISb+fJqrd4Z0IAu7XXXZYycAGIBBQwAItib\na3brD+9sVFLTyIn0VF9O3QXQxihgABChKgK1unFeaOTEjAuHalzfjj4nAtBWKGAAEIGcc7rxqSXa\nU1WnMwd10Q/OGex3JABtiAIGABHoj+9u1N/XVCirXbIeyR/HyAkgxlDAACDCrNpxUPcvXC1Jmjll\njHpmMXICiDUUMACIIJ+MnKhrDOqa0/rq4tE9/Y4EIAwoYAAQQe57cZXKdldpUHa6fn7pSL/jAAgT\nChgARIjXVu7S/723ScmJpscK8tQ+hZETQKyigAFABNh9sEY3Pb1UknTTRcM1uneWz4kAhBMFDAB8\nFgw63TBvifZV1+nsnK6aNmGg35EAhBn7twHEt0BAKi6WysqknBwpP1/KzPQ0wu8WbdDbZXvUOT1F\nD08dpwRGTgAxjwIGIH4tWiRNmiQFg1J1tZSeLs2YIS1cKE2Y4EmE5dsq9cArn4ycGKtuHdI8WS4A\nf3EIEkB8CgRC5SsQCJU
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f3a5ac3d350>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"fig = plot_data(X1, y, xlabel='x', ylabel='y')\n",
|
|||
|
"theta_start = np.matrix([0, 0]).reshape(2, 1)\n",
|
|||
|
"theta, _ = gradient_descent(cost, gradient, theta_start, X1, y, eps=0.00001)\n",
|
|||
|
"plot_fun(fig, polynomial_regression(theta), X1)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Ten model ma duże **obciążenie** (**błąd systematyczny**, _bias_) – zachodzi **niedostateczne dopasowanie** (_underfitting_)."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 27,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"[<matplotlib.lines.Line2D at 0x7f3a5aac3350>]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 27,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmAAAAFoCAYAAADw0EcgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3Xd81dXh//H3uZlkAAmEAGGEEKYg\nAmGKglvRuhWtixa/WqvVLltr/bXf77etWvut1ipq3bOKqy5wCyKoQED2SiCMsBJWFmTde35/3DBU\n0BCSz7nj9Xw88khy703y5uPN9Z3zOZ9zjLVWAAAA8I7PdQAAAIBoQwEDAADwGAUMAADAYxQwAAAA\nj1HAAAAAPEYBAwAA8FiLFTBjzJPGmBJjzNKDbks3xnxojCloeJ/WUj8fAAAgVLXkCNjTks78xm23\nSfrYWttL0scNnwMAAEQV05ILsRpjsiW9Y60d0PD5KknjrLVbjDGdJM2w1vZpsQAAAAAhyOs5YJnW\n2i0NH2+VlOnxzwcAAHAu1tUPttZaY8xhh9+MMddJuk6SkpOTh/bt29ezbAAAAI0xf/787dbajCP9\nOq8L2DZjTKeDTkGWHO6B1tpHJT0qSXl5eTY/P9+rjAAAAI1ijFnflK/z+hTkW5Kuafj4Gklvevzz\nAQAAnGvJZShelPSFpD7GmGJjzCRJd0s6zRhTIOnUhs8BAACiSoudgrTWXn6Yu05pqZ8JAAAQDlgJ\nHwAAwGMUMAAAAI9RwAAAADxGAQMAAPAYBQwAAMBjFDAAAACPUcAAAAA8RgEDAADwGAUMAADAYxQw\nAAAAj1HAAAAAPEYBAwAA8BgFDAAAwGMUMAAAAI9RwAAAADxGAQMAAPAYBQwAAMBjFDAAAACPxboO\nAACIHtZa1fmtav0B1dYf9Ob3q+Zrn3/945r6gDJbJ2pwt7ZqnRjn+p8BHDUKGACgWW2vrNHMxRs1\n45OF+qpC2hsbr5rYeNX4rer8AVnb9O9tjNQnM1VDu6dpaPc05XVPV9f0VjLGNN8/APAABQwAcFT8\nAauFG3dpxqpSfbq6VIuLyxruaSUZSX5Jfv/+x8f4jOJjfIqP9SkhNvg+Ptan+Jhvfx78OEZxPqOi\nHVVauqlMK7dWaOXWCr0wZ4MkKSM1QUO7pSkvO01DuqdpQOc2io9lhg1CGwUMAHDESiqqNXP1ds1Y\nVaLPCrarbG/d/vvi62s1csMSjVs7X8evX6S0vWVKqK9TfFKi4tcVKaZ1apN/bnWdX0s2lSl/3S7N\nX79L89fvVGlFjd5btlXvLdsqSUqI9WlQl7Ya0j1NeQ0jZWnJ8Uf9bwaaEwUMAPC96v0BfbVxt2as\nKtGMVaVatrn8a/dnt0vSuD4dNLZogUb+8Ra1Ktv17W8SK+mVl6VJk5qcIzEuRsOy0zUsO11ScE5Z\n0fYq5a/fpfnrdmn+hl0qLKnU3HU7NXfdzv1fl5ORrLyGU5bDe6Qru31ykzMAzYECBgA4pG3l1fq0\n4bTiZwWlKq+u339fYpxPo3LaaWzvDI3r0+FAofnts9KhypckVVVJhYXNmtEYo5yMFOVkpOjSvK6S\npF1Vtfpq4y7lr9ul/PW7tGjjbq0trdLa0iq9nF8sSRrXJ0M/OzlXQ7unN2seoLEoYACA/fbU1uvp\nz9fp7UVbtGLL10e5ctona2yfYOEa0SNdiXEx3/4GvXpJycnBsvVNyclSbm4LJT8gLTleJ/fN1Ml9\nMyVJtfUBLd9Svv+U5YxVpfvfRuW0089OydWonHZM5IenjD2ay1E8kpeXZ/Pz813HAIDIUVEhTZki\nFRRIvXqp7uJLNGXlbv3jowJtr6yRJLWKi9Honu00rk+GxvbuoG7tkhr3fbOygu+/KTVV2rxZSklp\n5n/MkdlVVasnZxfp6dnrVFETHNUb2j1NN52cq3G9MyhiOCLGmPnW2rwj/joKGABEmVmzpPHjpUBA\ntqpK7x17kv428nKtTessSRrUta1uOSVXo3u2P/Qo1xF8f1VVBUe+fD5p2jRpzJhm/sc0XdneOj37\n+To9MbtIu/cELyIYmNVGN52cq9P6Zcrno4jh+1HAAADf76ARqjldjtFdJ/1ICzv3lST12L1Ft046\nRWflZR/9KFBlZXCErbAweNpxwgTnI1+HU1VTr+e/XK/HPlur7ZW1kqS+HVN140m5Gj+wk2IoYvgO\nFDAAwPd7/HGt+t+/66/DL9UnucMlSe0rd+mW2f/WZWtmK+6+e4/qKsVwVl3n10tzN+iRT9dqa3m1\npODVkzeOy9W5x3VWXAxri+HbmlrAmIQPAFFi8+69urfA6LXL75E1PiXX7NF1c1/XtfPeUHJdsHA0\n91WK4SQxLkYTj++hy0d002vzN+mhGYVaW1qlX72ySP/4eLVuGJuri4ZmKSG2CadlgW9gBAwAIlzZ\nnjo9NKNQT32+TrX1AcX663XFwnf1s89fUvs9ZQcemJws3X9/1I6AfVOdP6A3F27WQ9MLtXZ78KrO\nTm0Sdf2JObpseLemzY9DxOEUJADga6rr/Hrm83WaPL1w/xpe5/TP0K23/1DdN6359heEyFWKocYf\nsJq6ZIsmf1KoVduCV3e2T0nQdSf20JUjuyspnpNJ0YwCBgCQFCwMry8o1n0frtbmsuCpxdE92+m2\ns/rq2C5tw+YqxVATCFh9uGKbHvikQEs3BddI694uSZN/OEQDsto4TgdXKGAAEOWstZq+qkR/fXfV\n/pGafp1a67az+urEXu2/fmVjGF2lGGqstZqxulR/fXelVm6tUHyMT7eP76trRjfD1aMIOxQwAIhi\n67ZX6bevLdacouD+h1ltW+nXZ/TWeYOyWM+qhVTX+fWXqSv03JfrJUlnHJOpey4apDZJcY6TwUsU\nMACIUtNXleiWF79SeXW92ibF6aaTcnXlyO5MEvfItCVb9NtXF6uipl5ZbVvpgR8O1pBuaa5jwSNN\nLWAsagIAYcpaq8nTC/Xjp+epvLpep/bL1Ke/PknXnpBD+fLQ+IGdNPXmEzSoSxtt2r1Xlz7yhR6d\nuUaBQOgPcMAdChgAhKHKmnrd8PwC/e39VbJW+sWpvfXoVUM5/eVIt3ZJeuUno3XtmB6qD1jdOW2l\nJj0zTzural1HQ4iigAFAmCnaXqULJs/We8u2KjUhVk9ck6dbTu3FXC/H4mN9uuOc/nr86jy1TYrT\n9FWlOuv+mZqzdofraAhBFDAACCOfrNymcx+cpYKSSuV2SNGbNx2vU/pluo6Fg5zaP1PTbj5Bed3T\ntK28Rpc/9qUe+LhAfk5J4iAUMAAIA4GA1T8/LtCkZ/JVUV2vM47J1Bs3Hq+cDJaOCEWd27bSS9eN\n1I0n9ZSV9PcPV+vqJ+eopKLadTSECAoYAIS4iuo6Xf/8fN374WpJ0q1n9NHDVwxVSgIrsIey2Bif\nbj2jr5750XC1T4nX7MIdGn//Z/qsoNR1NIQAChgAhLDCkkqdP3m2Ply+TamJsXrymmG68aRc5nuF\nkRN7Z2jazSdodM922l5Zq6ufnKv/e3+V6v0B19HgEAUMAELUh8u36fzJs7WmtEq9M1P09k1jdFLf\nDq5joQk6tE7Uc5NG6Ben9paR9OD0Ql3+2JfaUrbXdTQ4QgEDgBATCFjd9+Fq/dez+aqsqdf4gR31\nn58er+z2ya6j4SjE+IxuObWXXrh2pDqkJmjeul0af/9n+mTlNtfR4AAFDABCSHl1na57Ll/3f1wg\nY6TfnNlHk384RMnM94oYo3q207u3nKCxvTO0a0+dfvx0vv4ydTlXSUYZChgAhIjCkgqd/+BsfbSi\nRK0TY/XUxGH66bhcNniOQO1SEvTUxGG67ay+ivEZPfZZkW59dRElLIrwJxUAhID3l23VL6csVFWt\nX307pupfVw1V93accoxkPp/RT8b21KAubTXpmXl6fcEmSdLfLh6kGC6yiHhORsCMMb8wxiwzxiw1\nxrxojEl0kQMAXAsErP7
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f3a5aac3210>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"fig = plot_data(X2, y, xlabel='x', ylabel='y')\n",
|
|||
|
"theta_start = np.matrix([0, 0, 0]).reshape(3, 1)\n",
|
|||
|
"theta, _ = gradient_descent(cost, gradient, theta_start, X2, y, eps=0.000001)\n",
|
|||
|
"plot_fun(fig, polynomial_regression(theta), X1)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Ten model jest odpowiednio dopasowany."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 28,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"[<matplotlib.lines.Line2D at 0x7f3a5aa200d0>]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 28,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmAAAAFoCAYAAADw0EcgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3Xd81dXh//H3uZlkQICEsGcCCMgS\nGYJWxUkdrVrROlBR2/7qoNph7bC71bb6rW2dYN2KdQ/qQlFR2SBDRsIIhASSELLnzT2/P25UVEYC\nyefc8Xo+HjyS3HuT+/bj5fLO+ZxzPsZaKwAAAHjH5zoAAABAtKGAAQAAeIwCBgAA4DEKGAAAgMco\nYAAAAB6jgAEAAHis3QqYMeYhY0yRMWbtPrd1Mca8ZYzJaf7Yub2eHwAAIFS15wjYw5LO+Mptt0ia\nb63NljS/+WsAAICoYtpzI1ZjTH9Jr1prRzR/vVHSidbaQmNMD0kLrLVD2i0AAABACPJ6Dlimtbaw\n+fNdkjI9fn4AAADnYl09sbXWGmMOOPxmjLlW0rWSlJycfMzQoUM9ywYACC91jU3KKaqSkZSdmaqE\nWNaYwRvLly8vsdZmtPb7vC5gu40xPfY5BVl0oAdaax+Q9IAkjRs3zi5btsyrjACAMDPjoSWq2lSs\nGZP66bfnjnAdB1HEGJN3ON/n9a8IL0ua0fz5DEkvefz8AIAIszCnRO9tKlZqQqxumJrtOg7QIu25\nDcVTkj6WNMQYk2+MmSnpL5JONcbkSDql+WsAAA5LU8Dqj/PWS5J+cNIgdU1JcJwIaJl2OwVprb34\nAHdNba/nBABElxdW7tT6wgr17JSoqyYPcB0HaDFmKQIAwlJdY5P+/uZGSdLNpw1RYlyM40RAy1HA\nAABhac7CrSosr9OwHh317TG9XMcBWoUCBgAIOyVV9bp3wWZJ0i++eZR8PuM4EdA6FDAAQNi5e36O\nqur9OnFIhiZnpbuOA7QaBQwAEFa2FFfpycXb5TPSz888ynUc4LBQwAAAYeX21zfIH7C6cFwfDeme\n6joOcFgoYACAsLFka6neWLdbHeJidNOpg13HAQ4bBQwAEBastfpT86ar15wwUN06JjpOBBw+ChgA\nICy8tqZQq3aUKT0lQd87YaDrOMARoYABAEJevb9Jt7++QZJ006mDlZzQbhdyATxBAQMAhLzHPs7T\njtJaZXVL0YXjeruOAxwxfoUAAIQsa61eW1Oof7ydI0m6ddpQxcYwdoDwRwEDAISkgrJa/erFtZq/\noUiSdOaI7jppSDfHqYC2QQEDAISUpoDV44vydMfrG1Td0KTUhFjdMm2oLj62r4zhkkOIDBQwAEDI\n2LirUrc8v1ort5dJks4Y3l2/PXe4MtlyAhGGAgYAcK6usUn3vJure9/brMYmq26pCfrduSN0xoju\nrqMB7YICBgBwasnWUt3y/GptKa6WJF0yoa9+duZQdUyMc5wMaD8UMACAE+W1jfrL/zboqSXbJUmD\nMpL15/NGavyALo6TAe2PAgYA8Nzrawv165fWqaiyXnExRj84MUs/PGmQEmJjXEcDPEEBAwC0vcpK\nae5cKSdHys6Wpk+XUlO1u6JOv35prd5Yt1uSNLZvmv5y/kgNzkx1HBjwFgUMANC2Fi6Upk2TAgGp\nulpKTlbgppv15L3P6/ZNflXW+5WSEKufnjFEl07oJ5+PrSUQfShgAIC2U1kZLF+VlZ/flJvYWT8/\n/XotXVMnSTrlqG76/bdGqEenDq5SAs5RwAAAbWfuXCkQkN/4lN8pUy8N+4b+PWm6GmLjlF5dpt8O\nspp2+TQ2VEXUo4ABAA5bTYNfW4qrtbm4SrlFVdqcE6vc6XdoW+eeaoj9YhuJiz55Qz9/9yF1+tH1\nEuULoIABAA7OWqs91Q3BgvVZ0Squ1uaiKu0sq/3yg02GlBH8tGdFkQYXb9f3Fj+nSTvWSMnJUlaW\n9/8BQAiigAFANNrPKsVAcop2ltUqp6hSuUVfFK3coiqV1zbu98fExRj175qsQRkpyuqWokGpMcq6\n7HwN3LFJyY11X36wzxdcDQmAAgYA0cZ+8IEKp1+mTWm9lJOSqU3rm7Tpw4eU0zNLNU37/57UhFgN\n6tZcsj4rWxnJ6tMlSXExvi8/+JF/Byfix8d8vgpSPp80b56UktL+/4FAGKCAAUCEstaqqLJem3ZX\natPuKuXsrtSmgnLlbNmlysv//fVvaJLSk+M0uHtHZX9WtrqlKCsjRRmpCS2fOD9lilRQEBxhy80N\nnnacPp3yBeyDAgYAESAQsFqWt1efFpRrU1Fz2dp9gFOHCUnqXFOuwSXbNbgkT4NLtiu7ZLsG15So\ny+1/kGbOPPJAKSlt83OACEUBA4Aw1hSwmremUP98J0ebdld97f6OibEa0j1V2ZmpGtwtRYNfflrZ\n/7pD6TVl2u94Vm5uu2cGQAEDgLDkbwro1dXB4rW5uFqS1KNTor4xOCNYtjJTNDgzVd2+eupwfYZk\n9j+hnlWKgHcoYAAQRvxNAb24qkD/fjdXW0uCxatXWgf98KQsnX9Mr0NfzHr6dOmmm/Z/H6sUAc9Q\nwAAgDDT4A3phZb7+/e5mbS+tkST17ZKk607K0rfH9vr6SsQDSU0Nrkb8yrUaWaUIeIsCBgAhrN7f\npGeX5+uedzd/vunpgPRkXXdSls4d3VOxLS1e+2KVIuAcBQwAQlBdY5PmLt2h+97brMLy4IamWd1S\ndP3JWTprZE/F+I7wcj6sUgScooABQAipbWjSk0u26/73Nquosl6SNCQzVddPzdKZI3ocefECEBIo\nYAAQAqrr/XpicZ4eeH+LSqoaJEnDenTUDVOzdNqw7vJRvICIQgEDAIeq6v169ONtmv3BVpVWB4vX\nyN6ddMPJ2Zp6VLeW7z4PIKxQwADAkV3ldfru7EXa0ryP1+g+abrxlGydODiD4gVEOAoYADiws6xW\n331wkfL21Ci7W4p+ffYwTclKp3gBUYICBgAe21Fao4sfXKT8vbUa0aujHrtqgjonx7uOBcBDFDAA\n8NC2kmpd/OAiFZbXaXSfND1y1Xh16hDnOhYAj1HAAMAjuUVV+u6Di1RUWa9x/TrrP1ceq9REyhcQ\njShgAOCBjbsqdcnsRSqpatDEgV00Z8axSk7gLRiIVvztB4B2tq6gXJfOXqy9NY2akpWuBy8fpw7x\nh7hoNoCIRgEDgHa0Or9Ml81ZovLaRp04JEP3XXqMEuMoX0C0o4ABQDtZsX2vZsxZosp6v04dlql/\nfXeMEmIpXwAoYADQLpZuK9UVDy1RdUOTph3dXf+4aIziYnyuYwEIEU7eDYwxPzLGrDPGrDXGPGWM\nSXSRAwDaw0ebS3T5nGD5OmdUT91N+QLwFZ6/Ixhjekm6QdI4a+0ISTGSLvI6BwC0h/c3FevK/yxV\nbWOTzh/bW3dNH61YyheAr3D1rhArqYMxJlZSkqQCRzkAoM28u6FIVz+6TPX+gC4e30d/vWCkYnxc\nWgjA13lewKy1OyX9TdJ2SYWSyq21b371ccaYa40xy4wxy4qLi72OCQCt8sa6Xbr2sWVq8Ad0+aR+\n+uO3jpaP8gXgAFycguws6VxJAyT1lJRsjLn0q4+z1j5grR1nrR2XkZHhdUwAaLHXVhfqh0+sUGOT\n1cwpA/Tbc4ZTvgAclItTkKdI2mqtLbbWNkp6XtJxDnIAwBF7adVOXf/UCvkDVj84cZB++c2jZAzl\nC8DBuShg2yVNNMYkmeC71FRJ6x3kAIAj8uzyfM2au0oBK90wNVs/PX0I5QtAi3i+D5i1drEx5llJ\nKyT5Ja2U9IDXOQDgSDy1ZLtufWGNrJV+fNpgXXdytutIAMKIk41YrbW3SbrNxXMDwJF6d2ORfv78\nGknSrdOG6toTBjlOBCDcsDkNALRCeU2jbnlutSTpplMHU74AHBYKGAC0wu9e/VS7K+o1tm+afnhS\nlus4AMIUBQwAWujtT3f
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f3a5aa20490>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"fig = plot_data(X5, y, xlabel='x', ylabel='y')\n",
|
|||
|
"theta_start = np.matrix([0, 0, 0, 0, 0, 0]).reshape(6, 1)\n",
|
|||
|
"theta, _ = gradient_descent(cost, gradient, theta_start, X5, y, alpha=0.5, eps=10**-7)\n",
|
|||
|
"plot_fun(fig, polynomial_regression(theta), X1)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Ten model ma dużą **wariancję** (_variance_) – zachodzi **nadmierne dopasowanie** (_overfitting_)."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"<img style=\"margin:auto\" width=\"90%\" src=\"fit.png\"/>"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Obciążenie (błąd systematyczny, _bias_)\n",
|
|||
|
"\n",
|
|||
|
"* Wynika z błędnych założeń co do algorytmu uczącego się.\n",
|
|||
|
"* Duże obciążenie powoduje niedostateczne dopasowanie."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Wariancja (_variance_)\n",
|
|||
|
"\n",
|
|||
|
"* Wynika z nadwrażliwości na niewielkie fluktuacje w zbiorze uczącym.\n",
|
|||
|
"* Wysoka wariancja może spowodować nadmierne dopasowanie (modelując szum zamiast sygnału)."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"<img style=\"margin:auto\" width=\"60%\" src=\"bias2.png\"/>"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"<img style=\"margin:auto\" width=\"60%\" src=\"curves.jpg\"/>"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## 2.7. Metodologia testowania"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Zbiór uczący a zbiór testowy"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Na zbiorze uczącym (treningowym) uczymy algorytmy, a na zbiorze testowym sprawdzamy ich poprawność.\n",
|
|||
|
"* Zbiór uczący powinien być kilkukrotnie większy od testowego (np. 4:1, 9:1 itp.).\n",
|
|||
|
"* Zbiór testowy często jest nieznany.\n",
|
|||
|
"* Należy unikać mieszania danych testowych i treningowych – nie wolno „zanieczyszczać” danych treningowych danymi testowymi!"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Czasami potrzebujemy dobrać parametry modelu, np. $\\alpha$ – który zbiór wykorzystać do tego celu?"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"collapsed": true,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Zbiór walidacyjny"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Do doboru parametrów najlepiej użyć jeszcze innego zbioru – jest to tzw. **zbiór walidacyjny**"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
" * Zbiór walidacyjny powinien mieć wielkość zbliżoną do wielkości zbioru testowego, czyli np. dane można podzielić na te trzy zbiory w proporcjach 3:1:1, 8:1:1 itp."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Walidacja krzyżowa"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Którą część danych wydzielić jako zbiór walidacyjny tak, żeby było „najlepiej”?"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
" * Niech każda partia danych pełni tę rolę naprzemiennie!"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"<img width=\"100%\" src=\"https://chrisjmccormick.files.wordpress.com/2013/07/10_fold_cv.png\"/>\n",
|
|||
|
"Żródło: https://chrisjmccormick.wordpress.com/2013/07/31/k-fold-cross-validation-with-matlab-code/"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Walidacja krzyżowa\n",
|
|||
|
"\n",
|
|||
|
"* Podziel dane $D = \\left\\{ (x^{(1)}, y^{(1)}), \\ldots, (x^{(m)}, y^{(m)})\\right\\} $ na $N$ rozłącznych zbiorów $T_1,\\ldots,T_N$\n",
|
|||
|
"* Dla $i=1,\\ldots,N$, wykonaj:\n",
|
|||
|
" * Użyj $T_i$ do walidacji i zbiór $S_i$ do trenowania, gdzie $S_i = D \\smallsetminus T_i$. \n",
|
|||
|
" * Zapisz model $\\theta_i$.\n",
|
|||
|
"* Akumuluj wyniki dla modeli $\\theta_i$ dla zbiorów $T_i$.\n",
|
|||
|
"* Ustalaj parametry uczenia na akumulowanych wynikach."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Walidacja krzyżowa – wskazówki\n",
|
|||
|
"\n",
|
|||
|
"* Zazwyczaj ustala się $N$ w przedziale od $4$ do $10$, tzw. $N$-krotna walidacja krzyżowa (_$N$-fold cross validation_). \n",
|
|||
|
"* Zbiór $D$ warto zrandomizować przed podziałem.\n",
|
|||
|
"* W jaki sposób akumulować wyniki dla wszystkich zbiórow $T_i$?\n",
|
|||
|
"* Po ustaleniu parametrów dla każdego $T_i$, trenujemy model na całych danych treningowych z ustalonymi parametrami.\n",
|
|||
|
"* Testujemy na zbiorze testowym (jeśli nim dysponujemy)."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### _Leave-one-out_\n",
|
|||
|
"\n",
|
|||
|
"Jest to szczególny przypadek walidacji krzyżowej, w której $N = m$."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Jaki jest rozmiar pojedynczego zbioru $T_i$?\n",
|
|||
|
"* Jakie są zalety i wady tej metody?\n",
|
|||
|
"* Kiedy może być przydatna?"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Zbiór walidujący a algorytmy optymalizacji\n",
|
|||
|
"\n",
|
|||
|
"* Gdy błąd rośnie na zbiorze uczącym, mamy źle dobrany parametr $\\alpha$. Należy go wtedy zmniejszyć.\n",
|
|||
|
"* Gdy błąd zmniejsza się na zbiorze trenującym, ale rośnie na zbiorze walidującym, mamy do czynienia ze zjawiskiem **nadmiernego dopasowania** (_overfitting_).\n",
|
|||
|
"* Należy wtedy przerwać optymalizację. Automatyzacja tego procesu to _early stopping_."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## 2.8. Regularyzacja"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 29,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def SGD(h, fJ, fdJ, theta, X, Y, \n",
|
|||
|
" alpha=0.001, maxEpochs=1.0, batchSize=100, \n",
|
|||
|
" adaGrad=False, logError=False, validate=0.0, valStep=100, lamb=0, trainsetsize=1.0):\n",
|
|||
|
" errorsX, errorsY = [], []\n",
|
|||
|
" errorsVX, errorsVY = [], []\n",
|
|||
|
" \n",
|
|||
|
" XT, YT = X, Y\n",
|
|||
|
" \n",
|
|||
|
" m_end=int(trainsetsize*len(X))\n",
|
|||
|
" \n",
|
|||
|
" if validate > 0:\n",
|
|||
|
" mv = int(X.shape[0] * validate)\n",
|
|||
|
" XV, YV = X[:mv], Y[:mv] \n",
|
|||
|
" XT, YT = X[mv:m_end], Y[mv:m_end] \n",
|
|||
|
" m, n = XT.shape\n",
|
|||
|
"\n",
|
|||
|
" start, end = 0, batchSize\n",
|
|||
|
" maxSteps = (m * float(maxEpochs)) / batchSize\n",
|
|||
|
" \n",
|
|||
|
" if adaGrad:\n",
|
|||
|
" hgrad = np.matrix(np.zeros(n)).reshape(n,1)\n",
|
|||
|
" \n",
|
|||
|
" for i in range(int(maxSteps)):\n",
|
|||
|
" XBatch, YBatch = XT[start:end,:], YT[start:end,:]\n",
|
|||
|
"\n",
|
|||
|
" grad = fdJ(h, theta, XBatch, YBatch, lamb=lamb)\n",
|
|||
|
" if adaGrad:\n",
|
|||
|
" hgrad += np.multiply(grad, grad)\n",
|
|||
|
" Gt = 1.0 / (10**-7 + np.sqrt(hgrad))\n",
|
|||
|
" theta = theta - np.multiply(alpha * Gt, grad)\n",
|
|||
|
" else:\n",
|
|||
|
" theta = theta - alpha * grad\n",
|
|||
|
" \n",
|
|||
|
" if logError:\n",
|
|||
|
" errorsX.append(float(i*batchSize)/m)\n",
|
|||
|
" errorsY.append(fJ(h, theta, XBatch, YBatch).item())\n",
|
|||
|
" if validate > 0 and i % valStep == 0:\n",
|
|||
|
" errorsVX.append(float(i*batchSize)/m)\n",
|
|||
|
" errorsVY.append(fJ(h, theta, XV, YV).item())\n",
|
|||
|
" \n",
|
|||
|
" if start + batchSize < m:\n",
|
|||
|
" start += batchSize\n",
|
|||
|
" else:\n",
|
|||
|
" start = 0\n",
|
|||
|
" end = min(start + batchSize, m)\n",
|
|||
|
" return theta, (errorsX, errorsY, errorsVX, errorsVY)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 33,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Przygotowanie danych do przykładu regularyzacji\n",
|
|||
|
"\n",
|
|||
|
"n = 6\n",
|
|||
|
"\n",
|
|||
|
"data = np.matrix(np.loadtxt(\"ex2data2.txt\", delimiter=\",\"))\n",
|
|||
|
"np.random.shuffle(data)\n",
|
|||
|
"\n",
|
|||
|
"X = powerme(data[:,0], data[:,1], n)\n",
|
|||
|
"Y = data[:,2]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 34,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def draw_regularization_example(X, Y, lamb=0, alpha=1, adaGrad=True, maxEpochs=2500, validate=0.25):\n",
|
|||
|
" plt.figure(figsize=(16,8))\n",
|
|||
|
" plt.subplot(121)\n",
|
|||
|
" plt.scatter(X[:, 2].tolist(), X[:, 1].tolist(),\n",
|
|||
|
" c=Y.tolist(),\n",
|
|||
|
" s=100, cmap=plt.cm.get_cmap('prism'));\n",
|
|||
|
"\n",
|
|||
|
" theta = np.matrix(np.zeros(X.shape[1])).reshape(X.shape[1],1)\n",
|
|||
|
" thetaBest, err = SGD(h, J, dJ, theta, X, Y, alpha=alpha, adaGrad=adaGrad, maxEpochs=maxEpochs, batchSize=100, \n",
|
|||
|
" logError=True, validate=validate, valStep=1, lamb=lamb)\n",
|
|||
|
"\n",
|
|||
|
" xx, yy = np.meshgrid(np.arange(-1.5, 1.5, 0.02),\n",
|
|||
|
" np.arange(-1.5, 1.5, 0.02))\n",
|
|||
|
" l = len(xx.ravel())\n",
|
|||
|
" C = powerme(xx.reshape(l, 1),yy.reshape(l, 1), n)\n",
|
|||
|
" z = classifyBi(thetaBest, C).reshape(int(np.sqrt(l)), int(np.sqrt(l)))\n",
|
|||
|
"\n",
|
|||
|
" plt.contour(xx, yy, z, levels=[0.5], lw=3);\n",
|
|||
|
" plt.ylim(-1,1.2);\n",
|
|||
|
" plt.xlim(-1,1.2);\n",
|
|||
|
" plt.legend();\n",
|
|||
|
" plt.subplot(122)\n",
|
|||
|
" plt.plot(err[0],err[1], lw=3, label=\"Training error\")\n",
|
|||
|
" if validate > 0:\n",
|
|||
|
" plt.plot(err[2],err[3], lw=3, label=\"Validation error\");\n",
|
|||
|
" plt.legend()\n",
|
|||
|
" plt.ylim(0.2,0.8);"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 35,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"/home/pawel/.local/lib/python2.7/site-packages/ipykernel_launcher.py:2: RuntimeWarning: overflow encountered in exp\n",
|
|||
|
" \n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA7QAAAHWCAYAAABHZMXWAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzs3XmczdUfx/HXd2bu7IOxRNmzxNgZ\nhOxLKEQoWfIrlJAohXalaJGsRcqSSEjKGolkH/sSylKW7IzZt+/vjy/TMNudmTtzZ8b7+XjMo5nv\n93zP+dxBdz5zzvkcwzRNRERERERERHIaF2cHICIiIiIiIpIeSmhFREREREQkR1JCKyIiIiIiIjmS\nEloRERERERHJkZTQioiIiIiISI6khFZERERERERyJCW0IiIidxjDMFobhnHYMIw/DcMYnsT9EoZh\nrDMMY5dhGHsNw2jrjDhFRERSY+gcWhERkTuHYRiuwBGgJXAK2A50M03zYII204BdpmlONQwjAFhu\nmmYpZ8QrIiKSEs3QioiI3FnqAH+apnnMNM0oYD7Q4bY2JpDnxud5gTNZGJ+IiIjd3JwdgIiIiGSp\nosA/Cb4+BdS9rc1bwGrDMAYBPkCLrAlNREQkbXJkQluwYEGzVKlSzg5DMklMVAyXzlwhNDiMmOjY\n+Os2d1eKlLoLLz8vJ0YnIrlRUFDQRdM0Czk7jmykGzDTNM2PDcOoB8wxDKOyaZpxCRsZhtEP6Afg\n4+NTq0KFCk4IVUREciN735tzZEJbqlQpduzY4ewwJJOZpsmJA/8QtHoPodfC6Dz0YXzy+jg7LBHJ\nhQzDOOnsGLLQaaB4gq+L3biW0NNAawDTNDcbhuEJFATOJ2xkmuY0YBpAYGCgqfdmERFxFHvfm3Nk\nQit3BsMwKF25BKUrl3B2KCIiucl2oJxhGKWxEtnHgSdua/M30ByYaRhGRcATuJClUYqIiNhBRaFE\nRETuIKZpxgADgVXAIWCBaZoHDMMYZRhG+xvNXgT6GoaxB5gH9DZ1LIKIiGRDmqEVERG5w5imuRxY\nftu1NxJ8fhBokNVxiYiIpJUSWhERERERyZGio6M5deoUERERzg5F0snT05NixYphs9nS9bwSWhER\nERERyZFOnTqFn58fpUqVwjAMZ4cjaWSaJpcuXeLUqVOULl06XX1oD62IiIiIiORIERERFChQQMls\nDmUYBgUKFMjQDLsSWhERERERybGUzOZsGf3zU0IrIiIiIiKSDpcuXaJ69epUr16dIkWKULRo0fiv\no6Ki7Orjf//7H4cPH06xzeTJk5k7d64jQs51tIdWREREREQkHQoUKMDu3bsBeOutt/D19eWll166\npY1pmpimiYtL0nOJX331VarjDBgwIOPBpkFMTAxubm7Jfm3vc1lBM7QiIiIiIiIO9OeffxIQEED3\n7t2pVKkSZ8+epV+/fgQGBlKpUiVGjRoV3/aBBx5g9+7dxMTEkC9fPoYPH061atWoV68e58+fB+C1\n115j/Pjx8e2HDx9OnTp1uO+++9i0aRMAoaGhPProowQEBNC5c2cCAwPjk+2Etm/fTuPGjalVqxZt\n2rTh3Llz8f0OGTKEwMBAJk2aRI8ePejfvz916tRh5MiRXLx4kfbt21O1alXq16/P/v3742Pr1asX\nDRo0oHfv3pn5bU2SZmhFHMU0Yft2WLIEgoOhbFno3h0KFXJ2ZCIiIiK5XqnhyzKt7xNjHkrzM3/8\n8QezZ88mMDAQgDFjxpA/f35iYmJo2rQpnTt3JiAg4JZnrl27RuPGjRkzZgxDhw7lyy+/ZPjw4Yn6\nNk2Tbdu2sXTpUkaNGsXKlSuZOHEiRYoUYdGiRezZs4eaNWsmei4yMpLBgwezdOlSChYsyNy5c3n9\n9deZNm0aALGxsezYsQOAHj16cPbsWbZs2YKLiwv9+/enbt26LF26lNWrV9O7d+/4tn/88QcbNmzA\n09Mzzd+njFJCK+IIx45B+/Zw4gSEhVnJrZcXDB8OffvC+PHg6ursKEVEREQki5QpUyY+mQWYN28e\nM2bMICYmhjNnznDw4MFECa2Xlxdt2rQBoFatWvz2229J9t2pU6f4NidOnABg48aNvPLKKwBUq1aN\nSpUqJXru0KFDHDhwgBYtWgBWAlusWLH4+4899tgt7bt06RK/VHrjxo0sW2b90qBVq1b07t2b0NBQ\nADp06OCUZBaU0Ipk3JkzULcuXL4McXH/XQ8Pt/775ZfWjO2sWc6JT0RERESynI+PT/znR48e5dNP\nP2Xbtm3ky5ePHj16JHlUjbu7e/znrq6uxMTEJNm3h4dHqm2SYpomVatWTTZRThhzUl8nx952mUEJ\nrUhGvfkmXL16azKbUFgYLFwIQ4dCtWpZG5uIiIjIHSI9y4KzSnBwMH5+fuTJk4ezZ8+yatUqWrdu\n7dAxGjRowIIFC2jYsCH79u3j4MGDidoEBARw+vRptm3bRp06dYiKiuLo0aNJzubermHDhsydO5cR\nI0awZs0aihYt6tRE9iYltCIZERoK33wDqf1mLDISxo2782ZpY2Nh/37r+1S8uPUhIiIicoepWbMm\nAQEBVKhQgZIlS9KgQQOHjzFo0CB69epFQEBA/EfevHlvaePh4cHChQt5/vnnCQ4OJjY2lhdffNGu\nhHbUqFE89dRTVK1aFV9fX7uqM2cFwzRNZ8eQZoGBgebNDcgiTnXgANSrB9evp962YkVI4jdluVJM\njJXAf/SRtfTa1dVK6mvUgLFjoWFDZ0cocgvDMIJM0wxMvaUkR+/NIuIMhw4domLFis4OI1uIiYkh\nJiYGT09Pjh49SqtWrTh69GiWH6OTHkn9Odr73pz9X51IdpbMeWIZbpuTxcZaBbLWr7eWWye0eTM8\n+CDMng2dOzsnPhEREZFcKCQkhObNmxMTE4Npmnz++ec5IpnNqNz/CkUyU5kyYBipt7PZ4EY1uVxv\n0qSkk9mbwsOhVy9o3FhHGomIiIg4SL58+QgKCnJ2GFnuDpkyEskk7u7w7LNwo9Jcslxd4fnnsyYm\nZzJN+OCD5JPZhKZPz/x4RERERCRXU0IrklGvvQYlSljJbVK8vWHkSLj33qyNyxn++suq+Jya8HD4\n9tvMj0dEREREcjUltCIZ5ecH27bBww+Dpyf4+oKXl3Xd398qjPT6686OMmtERFiz0fa2FRERERHJ\nAO2hFXGEfPlg0SI4dw5WrbKOqSlZElq1gjtgM368okUhKsq+tuXLZ24sIiIiIpLraYZWxJEKF7YK\nHvXvD23b3lnJLFgz0g8+mHqhLF9fGDw4a2ISERERySRNmzZl1apVt1wbP348/fv3T/E5X19fAM6c\nOUPnZE5+aNKkCakdhzZ+/HjCEtQuadu2LVft2f6ViyihFRHHGj3aWnKdHA8PqFQJmjXLuphERERE\nMkG3bt2YP3/+Ldfmz59Pt27d7Hr+nnvuYeHCheke//aEdvny5eTLly/d/aVFTExMil/b+1xGKaEV\nEceqXBlWrIA8ecDH57/rhmF9XauWtSz7TjmXV0RERHKtzp07s2zZMqJubLk6ceIEZ86coWHDhvHn\nwtasWZMqVarwww8/JHr+xIkTVK5cGYDw8HAef/xxKlasSMeOHQkPD49v179/fwIDA6lUqRJvvvkm\nABMmTODMmTM0bdqUpk2bAlCqVCkuXrwIwLhx46hcuTKVK1dm/Pjx8eNVrFiRvn37UqlSJVq1anXL\nODdduHCBRx99lNq1a1O7dm1+//13AN566y169uxJgwYN6NmzJzNnzqR9+/Y0a9aM5s2bY5omw4YN\no3LlylSpUoVvbxQB/fXXX2nYsCHt27cnICDAId/7m+6w9ZAikiUaNYIzZ+Cbb+DrryEkBMqVg0GD\noH59+87uFREREUmLt/JmYt/XkrycP39+6tSpw4oVK+jQoQPz58+na9euGIaBp6cn33//PXny5OHi\nxYvcf//9tG/fHiOZn4O
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f3a5a60a890>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"draw_regularization_example(X, Y)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Regularyzacja\n",
|
|||
|
"\n",
|
|||
|
"* Metoda zapobiegania zjawisku nadmiernego dopasowania (*overfitting*)\n",
|
|||
|
"* „Kara” za ekstremalne wartości parametrów $\\theta$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Regularyzacja dla regresji liniowej – funkcja kosztu\n",
|
|||
|
"\n",
|
|||
|
"$$\n",
|
|||
|
"J(\\theta) \\, = \\, \\dfrac{1}{2m} \\left( \\displaystyle\\sum_{i=1}^{m} h_\\theta(x^{(i)}) - y^{(i)} \\color{red}{ + \\lambda \\displaystyle\\sum_{j=1}^{n} \\theta^2_j } \\right)\n",
|
|||
|
"$$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* $\\lambda$ – parametr regularyzacji\n",
|
|||
|
"* jeżeli $\\lambda$ jest zbyt mały, skutkuje to nadmiernym dopasowaniem\n",
|
|||
|
"* jeżeli $\\lambda$ jest zbyt duży, skutkuje to niedostatecznym dopasowaniem"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Regularyzacja dla regresji liniowej – gradient\n",
|
|||
|
"\n",
|
|||
|
"$$\\small\n",
|
|||
|
"\\begin{array}{llll}\n",
|
|||
|
"\\dfrac{\\partial J(\\theta)}{\\partial \\theta_0} &=& \\dfrac{1}{m}\\displaystyle\\sum_{i=1}^m \\left( h_{\\theta}(x^{(i)})-y^{(i)} \\right) x^{(i)}_0 & \\textrm{dla $j = 0$ }\\\\\n",
|
|||
|
"\\dfrac{\\partial J(\\theta)}{\\partial \\theta_j} &=& \\dfrac{1}{m}\\displaystyle\\sum_{i=1}^m \\left( h_{\\theta}(x^{(i)})-y^{(i)} \\right) x^{(i)}_j \\color{red}{+ \\dfrac{\\lambda}{m}\\theta_j} & \\textrm{dla $j = 1, 2, \\ldots, n $} \\\\\n",
|
|||
|
"\\end{array} \n",
|
|||
|
"$$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Regularyzacja dla regresji logistycznej – funkcja kosztu\n",
|
|||
|
"\n",
|
|||
|
"$$\n",
|
|||
|
"\\begin{array}{rtl}\n",
|
|||
|
"J(\\theta) & = & -\\dfrac{1}{m} \\left( \\displaystyle\\sum_{i=1}^{m} y^{(i)} \\log h_\\theta(x^{(i)}) + \\left( 1-y^{(i)} \\right) \\log \\left( 1-h_\\theta(x^{(i)}) \\right) \\right) \\\\\n",
|
|||
|
"& & \\color{red}{ + \\dfrac{\\lambda}{2m} \\displaystyle\\sum_{j=1}^{n} \\theta^2_j } \\\\\n",
|
|||
|
"\\end{array}\n",
|
|||
|
"$$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Regularyzacja dla regresji logistycznej – gradient\n",
|
|||
|
"\n",
|
|||
|
"$$\\small\n",
|
|||
|
"\\begin{array}{llll}\n",
|
|||
|
"\\dfrac{\\partial J(\\theta)}{\\partial \\theta_0} &=& \\dfrac{1}{m}\\displaystyle\\sum_{i=1}^m \\left( h_{\\theta}(x^{(i)})-y^{(i)} \\right) x^{(i)}_0 & \\textrm{dla $j = 0$ }\\\\\n",
|
|||
|
"\\dfrac{\\partial J(\\theta)}{\\partial \\theta_j} &=& \\dfrac{1}{m}\\displaystyle\\sum_{i=1}^m \\left( h_{\\theta}(x^{(i)})-y^{(i)} \\right) x^{(i)}_j \\color{red}{+ \\dfrac{\\lambda}{m}\\theta_j} & \\textrm{dla $j = 1, 2, \\ldots, n $} \\\\\n",
|
|||
|
"\\end{array} \n",
|
|||
|
"$$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Implementacja metody regularyzacji"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 36,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def J_(h,theta,X,y,lamb=0):\n",
|
|||
|
" m = float(len(y))\n",
|
|||
|
" f = h(theta, X, eps=10**-7)\n",
|
|||
|
" j = 1.0/m \\\n",
|
|||
|
" * -np.sum(np.multiply(y, np.log(f)) + \n",
|
|||
|
" np.multiply(1 - y, np.log(1 - f)), axis=0) \\\n",
|
|||
|
" + lamb/(2*m) * np.sum(np.power(theta[1:] ,2))\n",
|
|||
|
" return j\n",
|
|||
|
"\n",
|
|||
|
"def dJ_(h,theta,X,y,lamb=0):\n",
|
|||
|
" m = float(y.shape[0])\n",
|
|||
|
" g = 1.0/y.shape[0]*(X.T*(h(theta,X)-y))\n",
|
|||
|
" g[1:] += lamb/m * theta[1:]\n",
|
|||
|
" return g"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 37,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"slider_lambda = widgets.FloatSlider(min=0.0, max=0.5, step=0.005, value=0.01, description=r'$\\lambda$', width=300)\n",
|
|||
|
"\n",
|
|||
|
"def slide_regularization_example_2(lamb):\n",
|
|||
|
" draw_regularization_example(X, Y, lamb=lamb)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 38,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"application/vnd.jupyter.widget-view+json": {
|
|||
|
"model_id": "0738c69d0fff4e32ab23d4aa93ca9a71",
|
|||
|
"version_major": 2,
|
|||
|
"version_minor": 0
|
|||
|
},
|
|||
|
"text/html": [
|
|||
|
"<p>Failed to display Jupyter Widget of type <code>interactive</code>.</p>\n",
|
|||
|
"<p>\n",
|
|||
|
" If you're reading this message in Jupyter Notebook or JupyterLab, it may mean\n",
|
|||
|
" that the widgets JavaScript is still loading. If this message persists, it\n",
|
|||
|
" likely means that the widgets JavaScript library is either not installed or\n",
|
|||
|
" not enabled. See the <a href=\"https://ipywidgets.readthedocs.io/en/stable/user_install.html\">Jupyter\n",
|
|||
|
" Widgets Documentation</a> for setup instructions.\n",
|
|||
|
"</p>\n",
|
|||
|
"<p>\n",
|
|||
|
" If you're reading this message in another notebook frontend (for example, a static\n",
|
|||
|
" rendering on GitHub or <a href=\"https://nbviewer.jupyter.org/\">NBViewer</a>),\n",
|
|||
|
" it may mean that your frontend doesn't currently support widgets.\n",
|
|||
|
"</p>\n"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
"interactive(children=(FloatSlider(value=0.01, description=u'$\\\\lambda$', max=0.5, step=0.005), Button(description=u'Run Interact', style=ButtonStyle()), Output()), _dom_classes=('widget-interact',))"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<function __main__.slide_regularization_example_2>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 38,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"widgets.interact_manual(slide_regularization_example_2, lamb=slider_lambda)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 39,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def cost_lambda_fun(lamb):\n",
|
|||
|
" theta = np.matrix(np.zeros(X.shape[1])).reshape(X.shape[1],1)\n",
|
|||
|
" thetaBest, err = SGD(h, J, dJ, theta, X, Y, alpha=1, adaGrad=True, maxEpochs=2500, batchSize=100, \n",
|
|||
|
" logError=True, validate=0.25, valStep=1, lamb=lamb)\n",
|
|||
|
" return err[1][-1], err[3][-1]\n",
|
|||
|
"\n",
|
|||
|
"def plot_cost_lambda():\n",
|
|||
|
" plt.figure(figsize=(16,8))\n",
|
|||
|
" ax = plt.subplot(111)\n",
|
|||
|
" Lambda = np.arange(0.0, 1.0, 0.01)\n",
|
|||
|
" Costs = [cost_lambda_fun(lamb) for lamb in Lambda]\n",
|
|||
|
" CostTrain = [cost[0] for cost in Costs]\n",
|
|||
|
" CostCV = [cost[1] for cost in Costs]\n",
|
|||
|
" plt.plot(Lambda, CostTrain, lw=3, label='training error')\n",
|
|||
|
" plt.plot(Lambda, CostCV, lw=3, label='validation error')\n",
|
|||
|
" ax.set_xlabel(r'$\\lambda$')\n",
|
|||
|
" ax.set_ylabel(u'cost')\n",
|
|||
|
" plt.legend()\n",
|
|||
|
" plt.ylim(0.2,0.8)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 40,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA7MAAAHmCAYAAAChwtkCAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzs3Xl4XfVh5//PV+vVrqur1ZJsyxuW\nN8CYrWY326SNU5IAYRoeaBuYpMnQ5JnSkHYG0vzaTppmmIRMlpIFOvwSUpbSkJYESgMhECBewA5e\n8L5IsrXv29W99zt/nHMXybItyfdKOtL79Tx6zqJz7/2KVW9/z2KstQIAAAAAwEvSZnoAAAAAAABM\nFjELAAAAAPAcYhYAAAAA4DnELAAAAADAc4hZAAAAAIDnELMAAAAAAM9JacwaY242xrxvjDlgjHlg\nnO8vNMa8Yox5xxiz0xjzgVSOBwAAAAAwN5hUPWfWGJMuaZ+kGyQ1SNoi6Q5r7e6EYx6V9I619tvG\nmFWSXrDWLk7JgAAAAAAAc0YqZ2YvkXTAWnvIWhuU9GNJHxpzjJVU6K4XSWpK4XgAAAAAAHNERgrf\nu1rS8YTtBkmXjjnmi5JeMsb8V0l5kq5P4XgAAAAAAHNEKmN2Iu6Q9Li19n8ZYy6X9IQxZo21NpJ4\nkDHmXkn3SlJeXt5FK1eunIGhAgAAAABSbdu2bW3W2rKzHZfKmG2UVJuwXePuS/THkm6WJGvtm8YY\nn6RSSS2JB1lrH5X0qCRt2LDBbt26NVVjBgAAAADMIGPM0Ykcl8prZrdIWm6MqTPGZEn6mKTnxxxz\nTNImSTLG1EvySWpN4ZgAAAAAAHNAymLWWhuS9BlJL0raI+kpa+0uY8yXjDGb3cP+m6R7jDE7JD0p\n6W6bqtsrAwAAAADmjJReM2utfUHSC2P2PZiwvlvSxlSOAQAAAAAw98z0DaAAAAAAYEpGRkbU0NCg\noaGhmR4KpsDn86mmpkaZmZlTej0xCwAAAMCTGhoaVFBQoMWLF8sYM9PDwSRYa9Xe3q6GhgbV1dVN\n6T1SeQMoAAAAAEiZoaEhBQIBQtaDjDEKBALnNKtOzAIAAADwLELWu8717x0xCwAAAABT0NXVpW99\n61tTeu0HPvABdXV1nfGYBx98UC+//PKU3n8+IGYBAAAAYArOFLOhUOiMr33hhRdUXFx8xmO+9KUv\n6frrr5/y+CYrHA6P2j7bzzDZ45KNmAUAAACAKXjggQd08OBBXXDBBbr//vv16quv6sorr9TmzZu1\natUqSdLv//7v66KLLtLq1av16KOPxl67ePFitbW16ciRI6qvr9c999yj1atX68Ybb9Tg4KAk6e67\n79YzzzwTO/6hhx7S+vXrtXbtWu3du1eS1NraqhtuuEGrV6/WJz7xCS1atEhtbW2njPWll17S5Zdf\nrvXr1+vWW29VX19f7H0///nPa/369Xr66ad1zTXX6LOf/aw2bNigr3/96zpy5Iiuu+46rVu3Tps2\nbdKxY8diY/vkJz+pSy+9VH/+53+eur/IZ8DdjAEAAAB43uIH/i1l733ky7877v4vf/nLeu+99/Tu\nu+9Kkl599VVt375d7733XuwOvT/4wQ9UUlKiwcFBXXzxxfrIRz6iQCAw6n3279+vJ598Ut/97nd1\n22236dlnn9XHP/7xUz6vtLRU27dv17e+9S199atf1fe+9z391V/9la677jp94Qtf0M9//nN9//vf\nP+V1bW1t+uu//mu9/PLLysvL09/93d/p4Ycf1oMPPihJCgQC2r59uyTpO9/5joLBoLZu3SpJ+uAH\nP6i77rpLd911l37wgx/ovvvu07/8y79Icu4m/etf/1rp6elT+ct6zohZAAAAAEiSSy65ZNSjZh55\n5BE999xzkqTjx49r//79p8RsXV2dLrjgAknSRRddpCNHjoz73h/+8Idjx/zzP/+zJOn111+Pvf/N\nN98sv99/yuveeust7d69Wxs3bpQkBYNBXX755bHv33777aOOT9x+8803Y5915513jpqFvfXWW2cs\nZCViFgAAAACSJi8vL7b+6quv6uWXX9abb76p3NxcXXPNNeM+iiY7Ozu2np6eHjvN+HTHpaenT+o6\nVWutbrjhBj355JNnHfN426cz0eNShZgFAAAA4HmnOxU4lQoKCtTb23va73d3d8vv9ys3N1d79+7V\nW2+9lfQxbNy4UU899ZQ+//nP66WXXlJnZ+cpx1x22WX69Kc/rQMHDmjZsmXq7+9XY2OjVqxYcdb3\n/53f+R39+Mc/1p133qkf/vCHuvLKK5P+M0wVN4ACAAAAgCkIBALauHGj1qxZo/vvv/+U7998880K\nhUKqr6/XAw88oMsuuyzpY3jooYf00ksvac2aNXr66adVWVmpgoKCUceUlZXp8ccf1x133KF169bp\n8ssvj91A6my+8Y1v6LHHHtO6dev0xBNP6Otf/3rSf4apMtbamR7DpGzYsMFGL0YGAAAAMH/t2bNH\n9fX1Mz2MGTU8PKz09HRlZGTozTff1Kc+9anYDam8YLy/h8aYbdbaDWd7LacZAwAAAIBHHTt2TLfd\ndpsikYiysrL03e9+d6aHNG2IWQAAAADwqOXLl+udd96Z6WHMCK6ZBQAAAAB4DjELAAAAAPAcYhYA\nAAAA4DnELAAAAADAc4hZAAAAAJgm+fn5kqSmpiZ99KMfHfeYa665Rmd7HOnXvvY1DQwMxLY/8IEP\nqKurK3kD9QBiFgAAAACm2YIFC/TMM89M+fVjY/aFF15QcXFxMoZ2VqFQ6IzbE33duSJmAQAAAGAK\nHnjgAX3zm9+MbX/xi1/UV7/6VfX19WnTpk1av3691q5dq5/85CenvPbIkSNas2aNJGlwcFAf+9jH\nVF9fr1tuuUWDg4Ox4z71qU9pw4YNWr16tR566CFJ0iOPPKKmpiZde+21uvbaayVJixcvVltbmyTp\n4Ycf1po1a7RmzRp97Wtfi31efX297rnnHq1evVo33njjqM+Jam1t1Uc+8hFdfPHFuvjii/XGG2/E\nfrY777xTGzdu1J133qnHH39cmzdv1nXXXadNmzbJWqv7779fa9as0dq1a/VP//RPkqRXX31VV155\npTZv3qxVq1ad81/zRDxnFgAAAID3fbEohe/dPe7u22+/XZ/97Gf16U9/WpL01FNP6cUXX5TP59Nz\nzz2nwsJCtbW16bLLLtPmzZtljBn3fb797W8rNzdXe/bs0c6dO7V+/frY9/7mb/5GJSUlCofD2rRp\nk3bu3Kn77rtPDz/8sF555RWVlpaOeq9t27bpscce09tvvy1rrS699FJdffXV8vv92r9/v5588kl9\n97vf1W233aZnn31WH//4x0e9/k//9E/1uc99TldccYWOHTumm266SXv27JEk7d69W6+//rpycnL0\n+OOPa/v27dq5c6dKSkr07LPP6t1339WOHTvU1tamiy++WFdddZUkafv27XrvvfdUV1c3tb/+p0HM\nAgAAAMAUXHjhhWppaVFTU5NaW1vl9/tVW1urkZER/cVf/IVee+01paWlqbGxUc3NzaqsrBz3fV57\n7TXdd999kqR169Zp3bp1se899dRTevTRRxUKhXTixAnt3r171PfHev3113XLLbcoLy9PkvThD39Y\nv/rVr7R582bV1dXpggsukCRddNFFOnLkyCmvf/nll7V79+7Ydk9Pj/r6+iRJmzdvVk5OTux7N9xw\ng0pKSmKfe8cddyg9PV0VFRW6+uqrtWXLFhUWFuqSSy5JeshKxCwAAAAATNmtt96qZ555RidPntTt\nt98uSfrhD3+o1tZWbdu2TZmZmVq8eLGGhoYm/d6HDx/WV7/6VW3ZskV+v1933333lN4nKjs7O7ae\nnp4+7mnGkUhEb731lnw+3ynfiwby6bZPZ6LHTRYxCwAAAMD7TnMqcKrdfvvtuueee9TW1qZf/vKX\nkqTu7m6Vl5crMzNTr7z
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f3a61e722d0>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"plot_cost_lambda()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## 2.9. Krzywa uczenia się"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Krzywa uczenia pozwala sprawdzić, czy uczenie przebiega poprawnie.\n",
|
|||
|
"* Krzywa uczenia to wykres zależności między wielkością zbioru treningowego a wartością funkcji kosztu.\n",
|
|||
|
"* Wraz ze wzrostem wielkości zbioru treningowego wartość funkcji kosztu na zbiorze treningowym rośnie.\n",
|
|||
|
"* Wraz ze wzrostem wielkości zbioru treningowego wartość funkcji kosztu na zbiorze walidacyjnym maleje."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 41,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def cost_trainsetsize_fun(m):\n",
|
|||
|
" theta = np.matrix(np.zeros(X.shape[1])).reshape(X.shape[1],1)\n",
|
|||
|
" thetaBest, err = SGD(h, J, dJ, theta, X, Y, alpha=1, adaGrad=True, maxEpochs=2500, batchSize=100, \n",
|
|||
|
" logError=True, validate=0.25, valStep=1, lamb=0.01, trainsetsize=m)\n",
|
|||
|
" return err[1][-1], err[3][-1]\n",
|
|||
|
"\n",
|
|||
|
"def plot_learning_curve():\n",
|
|||
|
" plt.figure(figsize=(16,8))\n",
|
|||
|
" ax = plt.subplot(111)\n",
|
|||
|
" M = np.arange(0.3, 1.0, 0.05)\n",
|
|||
|
" Costs = [cost_trainsetsize_fun(m) for m in M]\n",
|
|||
|
" CostTrain = [cost[0] for cost in Costs]\n",
|
|||
|
" CostCV = [cost[1] for cost in Costs]\n",
|
|||
|
" plt.plot(M, CostTrain, lw=3, label='training error')\n",
|
|||
|
" plt.plot(M, CostCV, lw=3, label='validation error')\n",
|
|||
|
" ax.set_xlabel(u'trainset size')\n",
|
|||
|
" ax.set_ylabel(u'cost')\n",
|
|||
|
" plt.legend()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Krzywa uczenia a obciążenie i wariancja\n",
|
|||
|
"\n",
|
|||
|
"<img width=\"100%\" src=\"learning-curves.png\"/>\n",
|
|||
|
"\n",
|
|||
|
"Źródło: http://www.ritchieng.com/machinelearning-learning-curve"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 42,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA7MAAAHjCAYAAADxD0ixAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzs3Xl8VfWd//HXSQhhh7ArqKCAgIIo\nCCIoiCKordYNqq2tba2jY2vbabVaq9aF7nVaHVunTtXqjFbUWvVXK4uAqIASUFBAEwSEgKyyr1nO\n748TuAkECJCbk3vv6/l43Ee/n7vljQvl7Vm+QRiGSJIkSZKUSrLiDiBJkiRJ0qGyzEqSJEmSUo5l\nVpIkSZKUciyzkiRJkqSUY5mVJEmSJKUcy6wkSZIkKeVYZiVJkiRJKccyK0mSJElKOZZZSZIkSVLK\nqRd3gEPVunXrsFOnTnHHkCRJkiQlwaxZs9aGYdjmYO9LuTLbqVMn8vPz444hSZIkSUqCIAg+rc77\nPM1YkiRJkpRyLLOSJEmSpJRjmZUkSZIkpZyUu2ZWkiRJkgCKi4spKipix44dcUfRYWjQoAEdO3Yk\nJyfnsD5vmZUkSZKUkoqKimjatCmdOnUiCIK44+gQhGHIunXrKCoqonPnzof1HZ5mLEmSJCkl7dix\ng1atWllkU1AQBLRq1eqIjqpbZiVJkiSlLIts6jrSv3eWWUmSJElSyrHMSpIkSdJh2LBhA3/84x8P\n67MXXnghGzZsOOB77rrrLiZOnHhY358JLLOSJEmSdBgOVGZLSkoO+NlXX32VFi1aHPA99957L+ed\nd95h5ztUpaWlleaD/RoO9X01zbsZS5IkSUp5nW77Z9K+e8kvL6ry+dtuu41PPvmEPn36MHz4cC66\n6CLuvPNO8vLy+OijjygoKOBLX/oSy5YtY8eOHXzve9/j+uuvj/J26kR+fj5btmzhggsuYPDgwUyb\nNo0OHTrw0ksv0bBhQ6699lq+8IUvcMUVV9CpUye+/vWv88orr1BcXMxzzz1H9+7dWbNmDVdffTUr\nVqxg4MCBTJgwgVmzZtG6detKWcePH8/dd9/Nzp07OeGEE3j88cdp0qQJnTp1YvTo0UyYMIFbb72V\nRx55hD59+vDWW29x1VVXcfnll/PNb36TtWvX0qZNGx5//HGOPfZYrr32Who0aMB7773HoEGDeOCB\nB5L2139/PDIrSZIkSYfhl7/8JSeccALvv/8+v/nNbwCYPXs2f/jDHygoKADgscceY9asWeTn5/Pg\ngw+ybt26fb6nsLCQm266iXnz5tGiRQteeOGFKn9e69atmT17NjfeeCO//e1vAbjnnnsYNmwY8+bN\n44orrmDp0qX7fG7t2rXcf//9TJw4kdmzZ9OvX79K5bNVq1bMnj2bL3/5ywDs2rWL/Px8fvjDH/Ld\n736Xr3/968ydO5evfOUr3HzzzXs+V1RUxLRp02IpsuCRWUmSJEmqMf3796+0b+qDDz7Iiy++CMCy\nZcsoLCykVatWlT7TuXNn+vTpA0Dfvn1ZsmRJld992WWX7XnP3//+dwDeeuutPd8/cuRI8vLy9vnc\njBkzmD9/PoMGDQKisjpw4MA9r48ePbrS+yvO06dP3/OzrrnmGm699dY9r1155ZVkZ2fv7y9F0llm\nJUmSJKW8/Z0KXNsaN268Zz1lyhQmTpzI9OnTadSoEUOHDq1yX9Xc3Nw96+zsbLZv317ld+9+X3Z2\n9iFdpxqGIcOHD+eZZ545aOaq5v2p7vuSxdOMJUmSJOkwNG3alM2bN+/39Y0bN5KXl0ejRo346KOP\nmDFjRo1nGDRoEGPHjgWi62LXr1+/z3vOOOMM3n77bRYuXAjA1q1b95wGfTBnnnkmf/vb3wD4v//7\nP84666waSn7kLLOSJEmSdBhatWrFoEGDOPnkk7nlllv2eX3kyJGUlJTQo0cPbrvtNs4444waz3D3\n3Xczfvx4Tj75ZJ577jnat29P06ZNK72nTZs2PPHEE1x11VX07t2bgQMH8tFHH1Xr+x966CEef/xx\nevfuzVNPPcUf/vCHGv81HK4gDMO4MxySfv36hfn5+XHHkCIfvwbv/hlOuwZOujTuNJIkSRllwYIF\n9OjRI+4Ysdq5cyfZ2dnUq1eP6dOnc+ONN/L+++/HHavaqvp7GATBrDAM+x3ss14zKx2u9Utg7DVQ\nugsWTYH2vaHVCXGnkiRJUgZZunQpo0aNoqysjPr16/Poo4/GHanWWGalwzXh7qjIAoSlMOWXcHnm\n/OYhSZKk+HXt2pX33nsv7hix8JpZ6XB8Oh3m/6Pycx88B6vmxZNHkiRJyjCWWelQlZXBuNsTc7B7\nb60QJo2JJZIkSZKUaSyz0qH6YCysKD+Vo14DGP2/idc+/icUzYonlyRJkpRBLLPSodi1FSbek5gH\n3gTdL6x8J+NJ99Z+LkmSJCnDWGalQzHtIdi8Ilo3aQeDfxCtz7kDgvJ/nRZNgcVTY4knSZKkuq1J\nkyYArFixgiuuuKLK9wwdOpSDbUf6+9//nm3btu2ZL7zwQjZs2FBzQVOAZVaqrk0r4O0Km0QP+ynk\nlm9I3bor9Lk68drr90GK7eEsSZKk2nP00Ufz/PPPH/bn9y6zr776Ki1atKiJaAdVUlJywLm6nztS\nbs0jVdfr90Jx+W8Y7XtBn69Ufn3Ij2Hu2Gi7nqJ3oWAcnDiy9nNKkiRlop81T+J3b6zy6dtuu41j\njjmGm266KXrbz35GkyZNuOGGG7jkkktYv349xcXF3H///VxyySWVPrtkyRK+8IUv8OGHH7J9+3a+\n8Y1vMGfOHLp378727dv3vO/GG29k5syZbN++nSuuuIJ77rmHBx98kBUrVnDOOefQunVrJk+eTKdO\nncjPz6d169Y88MADPPbYYwBcd911fP/732fJkiVccMEFDB48mGnTptGhQwdeeuklGjZsWCnXmjVr\nuOGGG1i6dCkQleZBgwbxs5/9jE8++YRFixZx7LHHMmLECP7+97+zZcsWSktLmTJlCrfeeiv/+te/\nCIKAn/70p4wePZopU6Zw5513kpeXx0cffURBQUGN/W2xzErVsXw2zHkmMY/4OWRlV35Pi2Oh7zfg\n3f+O5kn3QdfzIcsTICRJktLR6NGj+f73v7+nzI4dO5Zx48bRoEEDXnzxRZo1a8batWs544wzuPji\niwmCoMrv+dOf/kSjRo1YsGABc+fO5bTTTtvz2pgxY2jZsiWlpaWce+65zJ07l5tvvpkHHniAyZMn\n07p160rfNWvWLB5//HHeeecdwjBkwIABDBkyhLy8PAoLC3nmmWd49NFHGTVqFC+88AJf/epXK33+\ne9/7Hj/4wQ8YPHgwS5cuZcSIESxYsACA+fPn89Zbb9GwYUOeeOIJZs+ezdy5c2nZsiUvvPAC77//\nPnPmzGHt2rWcfvrpnH322QDMnj2bDz/8kM6dO9fYX3uwzEoHF4Yw7ieJ+cSLoPPZVb/3rB/Ce09F\nR3BXfQjzX4STL6+dnJIkSapVp556KqtXr2bFihWsWbOGvLw8jjnmGIqLi/nJT37C1KlTycrKYvny\n5axatYr27dtX+T1Tp07l5ptvBqB379707t17z2tjx47lz3/+MyUlJXz22WfMnz+/0ut7e+utt7j0\n0ktp3LgxAJdddhlvvvkmF198MZ07d6ZPnz4A9O3blyVLluzz+YkTJzJ//vw986ZNm9iyZQsAF198\ncaUjucOHD6dly5Z7fu5VV11FdnY27dq1Y8iQIcycOZNmzZrRv3//Gi+yYJmVDm7+S7B0erTOyoHz\n79v/e5u2gwE3wFsPRPOkMdDjEsj2XzVJkqSk2s+pwMl25ZVX8vzzz7Ny5UpGjx4NwP/93/+xZs0a\nZs2aRU5ODp06dWLHjh2H/N2LFy/mt7/9LTNnziQvL49rr732sL5nt9zc3D3r7OzsSqcz71ZWVsaM\nGTNo0KDBPq/tLsj7m/e
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f3a61e27450>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"plot_learning_curve()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## 2.10. Obserwacje odstające"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 43,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Funkcja regresji liniowej\n",
|
|||
|
"\n",
|
|||
|
"def h_linear(Theta, x):\n",
|
|||
|
" return x * Theta\n",
|
|||
|
"\n",
|
|||
|
"def linear_regression(theta):\n",
|
|||
|
" return lambda x: h_linear(theta, x)\n",
|
|||
|
"\n",
|
|||
|
"# Wykres krzywej regresji (wersja macierzowa)\n",
|
|||
|
"def plot_regression(fig, fun, theta, X):\n",
|
|||
|
" ax = fig.axes[0]\n",
|
|||
|
" x0 = np.min(X[:, 1]) - 1.0\n",
|
|||
|
" x1 = np.max(X[:, 1]) + 1.0\n",
|
|||
|
" L = [x0, x1]\n",
|
|||
|
" LX = np.matrix([1, x0, 1, x1]).reshape(2, 2)\n",
|
|||
|
" ax.plot(L, fun(theta, LX), linewidth='2',\n",
|
|||
|
" label=(r'$y={theta0:.2}{op}{theta1:.2}x$'.format(\n",
|
|||
|
" theta0=float(theta[0][0]),\n",
|
|||
|
" theta1=(float(theta[1][0]) if theta[1][0] >= 0 else float(-theta[1][0])),\n",
|
|||
|
" op='+' if theta[1][0] >= 0 else '-')))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 44,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Wczytanie danych (mieszkania) przy pomocy biblioteki pandas\n",
|
|||
|
"\n",
|
|||
|
"alldata = pandas.read_csv('data_flats_with_outliers.tsv', sep='\\t',\n",
|
|||
|
" names=['price', 'isNew', 'rooms', 'floor', 'location', 'sqrMetres'])\n",
|
|||
|
"data = np.matrix(alldata[['price', 'sqrMetres']])\n",
|
|||
|
"\n",
|
|||
|
"m, n_plus_1 = data.shape\n",
|
|||
|
"n = n_plus_1 - 1\n",
|
|||
|
"Xn = data[:, 0:n]\n",
|
|||
|
"\n",
|
|||
|
"Xo = np.matrix(np.concatenate((np.ones((m, 1)), Xn), axis=1)).reshape(m, n + 1)\n",
|
|||
|
"yo = np.matrix(data[:, -1]).reshape(m, 1)\n",
|
|||
|
"\n",
|
|||
|
"Xo /= np.amax(Xo, axis=0)\n",
|
|||
|
"yo /= np.amax(yo, axis=0)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 45,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmwAAAFoCAYAAADq7KeuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAHV5JREFUeJzt3XuwpGddJ/Dvb5KY6MwUt0QCIRHd\nzBLRlcCOBCS1i6glzLIJKu7EtTRgMCAXUS5rEEsQrTXqIiIEMRUhxLVwKFSMGkDkIqYgwATDJQnZ\nGeMCSQYIl40nQy5M5tk/uoc5mfSZ6TmnT/dzej6fqlPd79vP+76/857unu+8l+ep1loAAOjXulkX\nAADAwQlsAACdE9gAADonsAEAdE5gAwDonMAGANC5mQW2qjq5qt5fVddV1bVV9cIRbaqq/rCqdlbV\nJ6vqMbOoFQBglo6e4bb3JHlxa+3jVbUxydVV9Z7W2nWL2jwlyabhzxlJ/mj4CABwxJjZEbbW2q7W\n2seHzxeSXJ/kpAOanZ3ksjZwVZL7V9VDplwqAMBMdXENW1U9PMmjk3zkgJdOSvL5RdM35b6hDgBg\nrs3ylGiSpKo2JPmLJL/UWvu3Za7j/CTnJ8n69ev/42mnnTbBCgEAVu7qq6/+cmvthOUsO9PAVlXH\nZBDW/qy19pcjmtyc5ORF0w8bzruX1trFSS5Oks2bN7ft27evQrUAAMtXVZ9d7rKzvEu0kvxJkutb\na7+/RLPLk/zs8G7RxyW5rbW2a2pFAgB0YJZH2J6Q5GeSfKqqrhnO+9UkpyRJa+2NSa5IsiXJziRf\nT/LMGdQJADBTMwtsrbUrk9Qh2rQkz5tORQAAferiLlEAAJYmsAEAdE5gAwDonMAGANA5gQ0AoHMC\nGwBA5wQ2AIDOCWwAAJ0T2AAAOiewAQB0TmADAOicwAYA0DmBDQCgcwIbAEDnBDYAgM4JbAAAnRPY\nAAA6J7ABAHROYAMA6JzABgDQOYENAKBzAhsAQOcENgCAzglsAACdE9gAADonsAEAdE5gAwDonMAG\nANA5gQ0AoHMCGwBA5wQ2AIDOCWwAAJ0T2AAAOiewAQB0TmADAOicwAYA0DmBDQCgcwIbAEDnBDYA\ngM4JbAAAnRPYAAA6J7ABAHROYAMA6NxMA1tVvamqvlRVn17i9SdW1W1Vdc3w59enXSMAwKwdPePt\nX5rk9UkuO0ibf2qtPXU65QAA9GemR9haax9M8tVZ1gAA0Lu1cA3b46vqE1X1zqr6nlkXAwAwbbM+\nJXooH0/yHa2126tqS5J3JNl0YKOqOj/J+UlyyimnTLdCAIBV1vURttbav7XWbh8+vyLJMVV1/Ih2\nF7fWNrfWNp9wwglTrxMAYDV1Hdiq6sSqquHzx2ZQ71dmWxUAwHTN9JRoVb01yROTHF9VNyV5RZJj\nkqS19sYkT0/yC1W1J8kdSc5prbUZlQsAMBMzDWyttZ86xOuvz6DbDwCAI1bXp0QBABDYAAC6J7AB\nAHROYAMA6JzABgDQOYENAKBzAhsAQOcENgCAzglsAACdE9gAADonsAEAdE5gAwDonMAGANA5gQ0A\noHMCGwBA5wQ2AIDOCWwAAJ0T2AAAOiewAQB0TmADAOicwAYA0DmBDQCgcwIbAEDnBDYAgM4JbAAA\nnRPYAAA6J7ABAHROYAMA6JzABgDQOYENAKBzAhsAQOcENgCAzglsAACdE9gAADonsAEAdE5gAwDo\nnMAGANA5gQ0AoHMCGwBA5wQ2AIDOCWwAAJ0T2AAAOiewAQB0bqaBrareVFVfqqpPL/F6VdUfVtXO\nqvpkVT1m2jVCtxYWkksuSX7lVwaPCwuzrgiAVXL0jLd/aZLXJ7lsidefkmTT8OeMJH80fIQj25VX\nJlu2JHv3Jrt3J+vXJy96UXLFFcmZZ866OgAmbKZH2FprH0zy1YM0OTvJZW3gqiT3r6qHTKc66NTC\nwiCsLSwMwloyeNw3//bbZ1sfABPX+zVsJyX5/KLpm4bz4Mi1bdvgyNooe/cOXgdgrvQe2MZSVedX\n1faq2n7rrbfOuhxYXTt27D+ydqDdu5OdO6dbDwCrrvfAdnOSkxdNP2w4715aaxe31ja31jafcMIJ\nUysOZmLTpsE1a6OsX5+ceup06wFg1fUe2C5P8rPDu0Ufl+S21tquWRcFM7V1a7JuiY/uunWD1wGY\nKzO9S7Sq3prkiUmOr6qbkrwiyTFJ0lp7Y5IrkmxJsjPJ15M8czaVQkc2bhzcDXrgXaLr1g3mb9gw\n6woBmLCZBrbW2k8d4vWW5HlTKgfWjjPPTG65ZXCDwc6dg9OgW7cKawBzatb9sAHLtWFDct55s64C\ngCno/Ro2AIAjnsAGANA5gQ0AoHMCGwBA5wQ2AIDOCWwAAJ0T2AAAOiewAQB0TmADAOicwAYA0DmB\nDQCgcwIbAEDnBDYAgM4JbAAAnRPYAAA6J7ABAHROYAMA6JzABgDQOYENAKBzAhsAQOeOnnUBwAot\nLCTbtiU7diSbNiVbtyYbN866KgAmSGCDtezKK5MtW5K9e5Pdu5P165MXvSi54orkzDNnXR0AE+KU\nKKxVCwuDsLawMAhryeBx3/zbb59tfQBMjMAGa9W2bYMja6Ps3Tt4HYC5ILDBWrVjx/4jawfavTvZ\nuXO69QCwagQ2WKs2bRpcszbK+vXJqadOtx4AVo3ABmvV1q3JuiU+wuvWDV4HYC4IbLBWbdw4uBt0\n48b9R9rWr98/f8OG2dYHwMTo1gPWsjPPTG65ZXCDwc6dg9OgW7cKawBzRmCDtW7DhuS88/ZPLywk\nl1yiI12AOSKwwTzRkS7AXHING8wLHekCzC2BDeaFjnQB5pbABvNCR7oAc0tgg3mhI12AuSWwwbzQ\nkS7A3Br7LtGqekCSTUmO2zevtfbB1SgKWIZ9HeYeeJfounU60gVY48YKbFX1rCQvTPKwJNckeVyS\nDyd50uqVBhw2HekCzKVxj7C9MMn3J7mqtfaDVXVakv+5emUBy3ZgR7oArHnjXsN2Z2vtziSpqmNb\na59J8ojVKwsAgH3GPcJ2U1XdP8k7krynqr6W5LOrVxawbAsLg1OihqYCmBvVWju8Bar+c5L7JXlX\na+3uValqBTZv3ty2b98+6zJgNkYNTbXvpgNDUwHMVFVd3VrbvJxlx+7Wo6qOqqqHJvnXDG48OHE5\nGzxgnU+uqhuqamdVXTDi9WdU1a1Vdc3w51kr3SbMLUNTAcytce8SfUGSVyT5YpJ9Y9+0JN+33A1X\n1VFJLkryI0luSvKxqrq8tXbdAU23tdaev9ztwFxbfPrzC19IvvGN0e2+8Y1BOzcjAKxJh3OX6CNa\na1+Z4LYfm2Rna+3GJKmqP09ydpIDAxswyoGnP48+OtmzZ3TbO+9MrvPRAlirxj0l+vkkt0142ycN\n17vPTcN5B/qJqvpkVb29qk4etaKqOr+qtlfV9ltvvXXCZUKHRp3+XCqs7fOVSf5/C4BpGvcI241J\nPlBVf5fkrn0zW2u/vypV7fc3Sd7aWrurqp6d5C0Z0Vlva+3iJBcng5sOVrkmmL1t2wZH1g7Hgx60\nOrUAsOrGDWyfG/58y/BnEm5OsviI2cOG877pgFOwlyT53QltG9a2HTv2H1kbx3HHJY985OrVA8Cq\nGiuwtdZ+I0mq6ttaa1+f0LY/lmRTVX1nBkHtnCT/fXGDqnpIa23XcPKsJNdPaNuwtm3alBx1VHLP\nPeO1P+YYg78DrGFjXcNWVY+vquuSfGY4/aiqesNKNtxa25Pk+UnenUEQe1tr7dqqelVVnTVs9otV\ndW1VfSLJLyZ5xkq2CXNjy5bxwtr69fsHhTeeKMCaNe4p0T9I8qNJLk+S1tonquo/rXTjrbUrklxx\nwLxfX/T8ZUlettLtwNx57WsP3aYqOf/85FWvEtYA1rixO85trX3+gFljnosBJmphIfmDPzh0u9aS\n17wm+dCHVr8mAFbV2N1
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f3a5ebfa610>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"fig = plot_data(Xo, yo, xlabel=u'metraż', ylabel=u'cena')\n",
|
|||
|
"theta_start = np.matrix([0.0, 0.0]).reshape(2, 1)\n",
|
|||
|
"theta, logs = gradient_descent(cost, gradient, theta_start, Xo, yo, alpha=0.01)\n",
|
|||
|
"plot_regression(fig, h_linear, theta, Xo)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 46,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Odrzućmy obserwacje odstające\n",
|
|||
|
"alldata_no_outliers = [\n",
|
|||
|
" (index, item) for index, item in alldata.iterrows() \n",
|
|||
|
" if item.price > 100 and item.sqrMetres > 10]\n",
|
|||
|
"\n",
|
|||
|
"alldata_no_outliers = alldata.loc[(alldata['price'] > 100) & (alldata['sqrMetres'] > 100)]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 47,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"data = np.matrix(alldata_no_outliers[['price', 'sqrMetres']])\n",
|
|||
|
"\n",
|
|||
|
"m, n_plus_1 = data.shape\n",
|
|||
|
"n = n_plus_1 - 1\n",
|
|||
|
"Xn = data[:, 0:n]\n",
|
|||
|
"\n",
|
|||
|
"Xo = np.matrix(np.concatenate((np.ones((m, 1)), Xn), axis=1)).reshape(m, n + 1)\n",
|
|||
|
"yo = np.matrix(data[:, -1]).reshape(m, 1)\n",
|
|||
|
"\n",
|
|||
|
"Xo /= np.amax(Xo, axis=0)\n",
|
|||
|
"yo /= np.amax(yo, axis=0)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 48,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAmwAAAFoCAYAAADq7KeuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3X+U3HV97/HXe38nu7MCJZdAICKX\nKI1XAbsV0RwvVWwhx0KvosHbU4MXDqWEVkE9Qr3Xn/WInuJvwOZGGqAWU7VH0xrlWH9cmiroQkEM\nENjSo+QHGAGzs5v9Nbvv+8f3O9nZ2e/Mzu78+H525vk4Z87OfL/fmfnsTjZ55fN5fz4fc3cBAAAg\nXG1pNwAAAADlEdgAAAACR2ADAAAIHIENAAAgcAQ2AACAwBHYAAAAApdaYDOzU8zsB2b2iJntMbN3\nJlxjZvY5Mxsys5+Z2SvSaCsAAECaOlJ875ykd7v7A2aWkXS/mX3X3R8puOZCSevi2zmSbo2/AgAA\ntIzUetjc/aC7PxDfz0p6VNKaossulnSHR+6VdIyZndjgpgIAAKQqiBo2MztV0tmS7is6tUbSUwWP\n92l+qAMAAGhqaQ6JSpLMrE/S1yW9y92Hl/gaV0q6UpJ6e3t/54wzzqhhCwEAAKp3//33/9rdVy3l\nuakGNjPrVBTWvuzu/5hwyX5JpxQ8Pjk+Noe7b5W0VZIGBgZ8cHCwDq0FAABYOjP7xVKfm+YsUZP0\nJUmPuvunSly2U9Lb49mir5J02N0PNqyRAAAAAUizh+01kv5E0sNm9mB87C8lrZUkd/+ipF2SNkoa\nknRE0jtSaCcAAECqUgts7r5bki1wjUva0pgWAQAAhCmIWaIAAAAojcAGAAAQOAIbAABA4AhsAAAA\ngSOwAQAABI7ABgAAEDgCGwAAQOAIbAAAAIEjsAEAAASOwAYAABA4AhsAAEDgCGwAAACBI7ABAAAE\njsAGAAAQOAIbAABA4AhsAAAAgSOwAQAABI7ABgAAEDgCGwAAQOAIbAAAAIEjsAEAAASOwAYAABA4\nAhsAAEDgCGwAAACBI7ABAAAEjsAGAAAQOAIbAABA4AhsAAAAgSOwAQAABI7ABgAAEDgCGwAAQOAI\nbAAAAIEjsAEAAASOwAYAABA4AhsAAEDgCGwAAACBI7ABAAAEjsAGAAAQOAIbAABA4AhsAAAAgSOw\nAQAABI7ABgAAEDgCGwAAQOAIbAAAAIEjsAEAAASOwAYAABA4AhsAAEDgCGwAAACBSzWwmdltZvYr\nM/t5ifPnmdlhM3swvn2g0W0EAABIW0fK779d0hck3VHmmn919zc2pjkAAADhSbWHzd3vkfRcmm0A\nAAAI3XKoYTvXzB4ys2+b2UuTLjCzK81s0MwGDx061Oj2AQAA1FXoge0BSS909zMlfV7SN5Iucvet\n7j7g7gOrVq1qaAMBAADqLejA5u7D7j4S398lqdPMjk+5WQAAAA0VdGAzs9VmZvH9Vypq77PptgoA\nAKCxUp0lamZ3STpP0vFmtk/SByV1SpK7f1HSJZL+zMxyksYkXerunlJzAQAAUpFqYHP3ty1w/guK\nlv0AAABoWUEPiQIAAIDABgAAEDwCGwAAQOAIbAAAAIEjsAEAAASOwAYAABA4AhsAAEDgCGwAAACB\nI7ABAAAEjsAGAAAQOAIbAABA4AhsAAAAgSOwAQAABI7ABgAAEDgCGwAAQOAIbAAAAIEjsAEAAASO\nwAYAABA4AhsAAEDgCGwAAACBI7ABAAAEjsAGAAAQOAIbAABA4AhsAAAAgSOwAQAABI7ABgAAEDgC\nGwAAQOAIbAAAAIEjsAEAAASOwAYAABA4AhsAAEDgCGwAAACBI7ABAAAEjsAGAAAQOAIbAABA4Ahs\nAAAAgSOwAQAABI7ABgAAEDgCGwAAQOAIbAAAAIEjsAEAAASOwAYAABA4AhsAAEDgCGwAAACBI7AB\nAAAEjsAGAAAQOAIbAABA4FINbGZ2m5n9ysx+XuK8mdnnzGzIzH5mZq9odBsBpCyblbZtk973vuhr\nNpt2iwCg4TpSfv/tkr4g6Y4S5y+UtC6+nSPp1vgrgFawe7e0caM0MyONjkq9vdJ110m7dkkbNqTd\nOgBomFR72Nz9HknPlbnkYkl3eOReSceY2YmNaR2AVGWzUVjLZqOwJkVf88dHRtJtHwA0UOg1bGsk\nPVXweF98DECz27Ej6llLMjMTnQeAFhF6YKuImV1pZoNmNnjo0KG0mwOgFp54YrZnrdjoqDQ01Nj2\nAECKQg9s+yWdUvD45PjYHO6+1d0H3H1g1apVDWscgDpaty6qWUvS2yudfnpj2wMAKQo9sO2U9PZ4\ntuirJB1294NpNwpAA2zaJLWV+CuqrS06DwAtItVZomZ2l6TzJB1vZvskfVBSpyS5+xcl7ZK0UdKQ\npCOS3pFOSwE0XCYTzQYtniXa1hYd7+tLu4UA0DCpBjZ3f9sC513SlgY1B0BoNmyQDhyIJhgMDUXD\noJs2EdYAtJy012EDgPL6+qTLL0+7FQCQqtBr2AAAAFoegQ0AACBwBDYAAIDAEdgAAAACR2ADAAAI\nHIENAAAgcAQ2AACAwBHYAAAAAkdgAwAACByBDQAAIHAENgAAgMAR2AAAAAJHYAMAAAgcgQ0AACBw\nBDYAAIDAEdgAAAACR2ADAAAIHIENAAAgcAQ2AACAwBHYAAAAAkdgAwAACByBDQAAIHAENgAAgMB1\npN0AAFgWsllpxw7piSekdeukTZukTCbtVgFoEQQ2AFjI7t3Sxo3SzIw0Oir19krXXSft2iVt2JB2\n6wC0AIZEAaCcbDYKa9lsFNak6Gv++MhIuu0D0BIIbABQzo4dUc9akpmZ6DwA1FnFQ6JmdqykdZJ6\n8sfc/Z56NAoAgvHEE7M9a8VGR6Whoca2B0BLqiiwmdkVkt4p6WRJD0p6laQfS3pd/ZoGAAFYty6q\nWUsKbb290umnN75NAFpOpUOi75T0u5J+4e6/J+lsSb+pW6sAIBSbNkltJf6qbGuLzgNAnVUa2Mbd\nfVySzKzb3R+T9JL6NQsAApHJRLNBM5moR02KvuaP9/Wl2z4ALaHSGrZ9ZnaMpG9I+q6ZPS/pF/Vr\nFgAEZMMG6cCBaILB0FA0DLppE2ENQMOYuy/uCWb/XdILJH3H3Sfr0qoqDAwM+ODgYNrNAAAAmMPM\n7nf3gaU8dzGzRNslnSDpP+NDqyX9cilvCgAAgMpVOkv0zyV9UNIzkvILErmkl9epXQCAULAtF5C6\nSnvY3inpJe7+bD0bAwAIDNtyAUGodJboU5IO17MhAIDAsC0XEIxKe9ielPRDM/uWpIn8QXf/VF1a\nBQBIXyXbcl1+eWPbBLSoSgPbL+NbV3wDADQ7tuUCglFRYHP3D0uSma109yP1bRIAIAhsywUEo6Ia\nNjM718wekfRY/PhMM7ulri0DAKSLbbmAYFQ66eAzkv5A0rOS5O4PSXptvRoFAAgA23IBwah44Vx3\nf8rMCg9N1745AICgsC0XEIRKA9tTZvZqSW5mnYrWZXu0fs0CAASjr4/ZoEDKKh0SvUrSFklrJO2X\ndJakq+vVKAAAAMyqtIftJknXuPvzkmRmx8bH/le9GgYACAjbUwGpqjSwvTwf1iTJ3Z83s7Pr1CYA\nQEjYngpIXaVDom1xr5okycyO0yImLJRiZheY2V4zGzKz6xPOX2Zmh8zswfh2RbXvCQBYBLanAoKw\nmCHRH5vZV+PHb5H0sWre2MzaJd0s6Q2S9kn6qZntdPdHii7d4e7XVPNeAIAlYnsqIAiV7nRwh5kN\nSnpdfOhNCcFqsV4pacjdn5QkM/uKpIslVfu6AFoZtVa1xfZUQBAWsw7bI6ptmFoj6amCx/sknZNw\n3ZvN7LWSHpd0rbs/lXANANS31urAAemGG6THHpPOOEP6+Melk06qTbtDdsop5c+ffHJj2gG0uEpr\n2NLyT5JOdfeXS/qupNu
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f3a5ebae0d0>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"fig = plot_data(Xo, yo, xlabel=u'metraż', ylabel=u'cena')\n",
|
|||
|
"theta_start = np.matrix([0.0, 0.0]).reshape(2, 1)\n",
|
|||
|
"theta, logs = gradient_descent(cost, gradient, theta_start, Xo, yo, alpha=0.01)\n",
|
|||
|
"plot_regression(fig, h_linear, theta, Xo)"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"celltoolbar": "Slideshow",
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 2
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython2",
|
|||
|
"version": "2.7.14"
|
|||
|
},
|
|||
|
"livereveal": {
|
|||
|
"start_slideshow_at": "selected",
|
|||
|
"theme": "amu"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|