2635 lines
217 KiB
Plaintext
2635 lines
217 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## Uczenie maszynowe UMZ 2017/2018\n",
|
|||
|
"# 5. Sieci neuronowe"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": []
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 9,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"%matplotlib inline\n",
|
|||
|
"\n",
|
|||
|
"import math\n",
|
|||
|
"import matplotlib\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import random"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## 5.1. Perceptron"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 10,
|
|||
|
"metadata": {
|
|||
|
"scrolled": true,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAUDBAQEAwUEBAQFBQUGBwwIBwcHBw8LCwkMEQ8SEhEP\nERETFhwXExQaFRERGCEYGh0dHx8fExciJCIeJBweHx7/2wBDAQUFBQcGBw4ICA4eFBEUHh4eHh4e\nHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh7/wAARCAFoAeADASIA\nAhEBAxEB/8QAHAAAAQUBAQEAAAAAAAAAAAAAAgABAwQFBgcI/8QAQBAAAQMCBAQEBAQEBQQCAwEA\nAQACEQMEBRIhMQZBUWETInGBMpGhsRRCwdEzcuHwFSM1UmIHJDTxFiVDc4KS/8QAFwEBAQEBAAAA\nAAAAAAAAAAAAAAECA//EAB8RAQEBAAMBAQEAAwAAAAAAAAABEQIhMUESA1Fhcf/aAAwDAQACEQMR\nAD8A+V+BwPxrpaCYOp9CureXaguJ3hclwU7LfuA3g/YrqLioZiYMla/nfUjncWotddkuEGdx6KKl\nUc3Y6LYr0W1CTlBPU6rKrsNN+XMPklrVS062sk6hTi4AEaKgXBoA0JPNO1rid1jEaBbSqfm0Ttsx\nqaen6qhlqU/hkI2XFdpHmIHRbgvi0uPyyVHUo1cvmaY5hFQxKq0QQDHdWqOJWzmEVKYLvX+i1RnP\nzZA12gGwUfPdBdVKlS4c5phpOgB5KE+INZJUE2cdULniRBUeQ/7k4bEyZ9lGUgcnB01MqMuA5FIe\nVNxYkZueSeVHm5z8k4Pcpi9pmu1UrHESoWMdzBU9KmNcxWrRIC7eVPRzuqNJEkmFEzIByWxw1a/j\nL6lSI8oPSeSzYOy4dwOnWwhr6wIJAI26Kapw9Qnyz8x+y37em23tKdu0A5QBI05IhHRTExzJwF7f\nNTkDpIRMw2uwEEH6LpQlDSdQCrjeOcNlWH5SVGKdRv5SupDW8mj5IatrTcPhHyTErmDUdTHmHoib\nWJZK234dSdyB9tlE/DaewMeyYjOY2TAUotyYJVwWZbrP0Sqt8JmZxEd9ExcV2UgBCMU2tOiYVKbi\nMtRuvdEGkiQUxQuAMRyTZQnynZI7wSgAU+cckgORGyIN0nMJ6SkQOsR7qgZhDB6Qj9pShSmonjbW\nU2VSmdNJQz/x+qJgA3RRVGjaSPRWS0BAaYkH9EwxULREAT6qM0SQW5dlcNITM/RJzRuNExZ0z6tm\nyo0B7ZjZUq+EUHknIdfRbRbqT1Q6piuYueG6ZHlza9x+yz6/DDmz4ebvJBXakb7+ijMtB826tjH5\nrga3D1y0GKZPuFn1cJumHzUyB7L0p7QRBbKrPotcZcwEdws4n5sedULGr+Lpscx0Eie2qbHfE/EC\ni1phgH2/qu/qWlGQ5tNoInUAKjc4XQqPLnMaSecJhY8883OU+c8l11bA6ZGjRPZqpV8BjVojf8qY\nrnvGe34SgEyPMtmpgdYCZ0/lVR2F3DdcrjH/ABKliVRqbnXkVj1Pjd6roKljXEkscBB5FYNZpbVc\n08j0Qje4K/1B3odfYrpq85yBrBOq53gj/wA2p6H7FdHXacwiVvh/xZFd5dOhhZeJ0XGqCFseGUxY\nHCDspyi2Obe1zSMx2Th4bzVjFAadYADQkqoHDnopDEza1TdyNlam4+bSFExw2KIU2vBPRVF+lTt6\ngGUwVI7D9Za4ALObTeD5dFM2tXpgDUkK7oa5piiYLpOqr5gTupbqo+4LS9sFsx/fsogyPi2VvQHx\nD1SLi7mUfl6pBzW891kwABPWFKKbolyA1nflCFniO3lTVzEzSwSHJFzOQSp27jJJUoo0xuVfTs4q\nE7KalTqVD5dhukH0W6t1KMV3uBFPbmkX8p6dvp5jBXacAWjTd+IANCfsuKp06lQg7L0jgXy03NHK\nfsru+n5dPUHnJTKQaDuUxTpcJgkqQMTtGqlpt0MrSYANEJy1TZQmLSh+UIbvCjdTM8lO5pQEIflA\nWnos7HQRYudzAMfJazws3Gmh1m6eh+yzhrh6l1cU3EtcYlGMZvKZnPI9B+yC7pNDzvuqjqYTMGpR\n4gqT5xr7K7Sx6lPmaZ9AucyhRua6dFMHYMxO0cRlcrdKvSqguYZ6rgTm3zJCpUafK6FTHoQjklmX\nB0r6uwaO+itUsbuGkFxB9ioWOxOuybKVzdLiI/maD6Aq7Rx+3Il5IPoUwjZAkpOZsqlPErWoYFQD\n1BVlteg5py1GmeS1pp8qiIRgtI0LT7ojAGYbpqq5aEBbqpy4uMnRCWyUnYgLVG9sx2U5TZU1rVYw\nEBAhWyFEW6dFEs1Ve0aQoiw9Fbe0yEDWnVGbNUTSHJA+l5DIk8lo+H6oalPbQoflk+DmHmbISNu1\n27WrT8P1QlpUp+dZbrGm5j8zBo0kfJeVYuzJidw0DQPK9oLf8qp/KfsV4zj3+sXX8/6LOM2Y2OBR\n/wB5UG+h+xXUVW6jTquZ4C81/U9D9iuvq0xA06rf8pe1ioGpvDEaKx4Y6IQpb2tZl5ZNrOB2ieUr\nDvKbaNUsIiOey657TIA5rFxbD31qudsDX9E0Ype2fKPqjzugZZ7wUFWj4Jh5gpmF0ExoUZT0q7hu\nPmVcpXlOTnYJPNZ7HCdVNT8N2k6pBbuKlAsDmRImQOaz31C53wkBXGWpcCQ7RQPa1ji0nUK3wRNo\nucdCfkpG2+X4nD3QsuCBA3QuNSoR5oUaTFtJuhcJTmu3XK0KJlAjVxlSBlNpkqUMK1R5Ia0iOikF\nOo8SSUIrU26NGpRis/8ALsrBPSotafO4DsrDKtKmCGtBncyqrbeo4y5xA9VZt7emXBuaT6rWKtUH\nvewva0gDkvQeAHZsxI6/ZcC53hsFICF3n/T7Z3v9lB1h3idkQahRt3ToSN3CmpDdQt3Cno7FaBZU\nRGiSMhBXqjZAWqZ42TZSrggLTCzsZbFo70K0jmWfjU/hHT0P2URw99/ELQPdVCQTGX6q7fD/ADlT\njSVKI35Z+FRE9BClq7D3UJUUHh/8pSyjl9VIUCtCEc2j5KF0O3aplHlPZZiVF4eUmJEpZT1KkLTo\nlCoHxntPlkehhXMOfVc8/wCY7lzKpOiVpYS0F59kE1e6uaJ8tVxA7lOzHbhplzSR0k/slfsGu/yW\ne9rXDQqUjVZxBqfEpkdPN/RWqOO2rvidlPrK5p9IAGXBRZN4Wortad/bOOlRvzCnFalUHle0xvqN\nFwYzt+Go75ohc3VH4arhKhruiW/7m/NAYXGNxS6b/wDkJ+Sm/wAfuG/Fr7BB1T3NBEqMPaJ5z1XO\nU+ImkxUaR7BWaeOWrh5pB9v3QbXit5uhMajTsQVlf4nbO2cUTLyk4nI4nqg1RlPID3URaOqqsuWx\nMpn3WiVdWnNHhVNd2n7LxXiNsY1dD/n+gXrgvPK+dRBXkXEL82NXLur/ANApjFra/wCnn+pv/l/Q\nrtq5jn1XIf8ATG2qXOKvbTBJykbdivQjhFVzocYg8wtcPGYxTBOqY06g0a1bzcLpU58R7QR1A/dO\n6pYW4zP8M+pCxybYLbe4cQG0ySVctcJuKzoecg9QrdbHMNps8vhyOjgsDEeKnUnk25BE8qn9EhLi\nlxlw+63q+NTJcABzHUrlB5SQ4QVv4jxBcXlMsdIB0nNKwqjSXZiTJM7KwO1zVIxrZl26hACdpdzk\nrfxKt0szT5dVFc03OfnKemX6kEgDklVqVHDRsrFZwNJzWndTuusoGXT2VdrQFICyPMAVWyNSrUOY\nbJNtzGYoxWgRTaD6JEVCeYHRA9NtFurjqpRVgRT+yha1gMuM9lMxzCIY0BA/nc8wMoVy08v+YdS3\nUKmxrnOlzp9lMamVnhgSDodUFku8Wqap1nT+/kvQP+n+hLeUn7Lzui7QQ2IXofAZEggyTOvsg7H8\nxHdSsURIzEbnqpGHsgVV7msJjmjtKjnEZuUJn+dpbtKKizLqD05LQuZ9ks3VQwXHeFI/zdk6+kIb\nk9U6ZrdDrKRTYqFyzs
|
|||
|
"text/html": [
|
|||
|
"\n",
|
|||
|
" <iframe\n",
|
|||
|
" width=\"800\"\n",
|
|||
|
" height=\"600\"\n",
|
|||
|
" src=\"https://www.youtube.com/embed/cNxadbrN_aI\"\n",
|
|||
|
" frameborder=\"0\"\n",
|
|||
|
" allowfullscreen\n",
|
|||
|
" ></iframe>\n",
|
|||
|
" "
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
"<IPython.lib.display.YouTubeVideo at 0x7f8808605e10>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 10,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from IPython.display import YouTubeVideo\n",
|
|||
|
"YouTubeVideo('cNxadbrN_aI', width=800, height=600)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"<img style=\"margin: auto\" width=\"80%\" src=\"http://m.natemat.pl/b94a41cd7322e1b8793e4644e5f82683,641,0,0,0.png\" alt=\"Frank Rosenblatt\"/>"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"<img style=\"margin: auto\" src=\"http://m.natemat.pl/02943a7dc0f638d786b78cd5c9e75742,641,0,0,0.png\" width=\"70%\" alt=\"Frank Rosenblatt\"/>"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"<img style=\"margin: auto\" width=\"50%\" src=\"https://upload.wikimedia.org/wikipedia/en/5/52/Mark_I_perceptron.jpeg\" alt=\"perceptron\"/>"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Pierwszy perceptron liniowy\n",
|
|||
|
"\n",
|
|||
|
"* Frank Rosenblatt, 1957\n",
|
|||
|
"* aparat fotograficzny podłączony do 400 fotokomórek (rozdzielczość obrazu: 20 x 20)\n",
|
|||
|
"* wagi – potencjometry aktualizowane za pomocą silniczków"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Uczenie perceptronu\n",
|
|||
|
"\n",
|
|||
|
"Cykl uczenia perceptronu Rosenblatta:\n",
|
|||
|
"\n",
|
|||
|
"1. Sfotografuj planszę z kolejnym obiektem.\n",
|
|||
|
"1. Zaobserwuj, która lampka zapaliła się na wyjściu.\n",
|
|||
|
"1. Sprawdź, czy to jest właściwa lampka.\n",
|
|||
|
"1. Wyślij sygnał „nagrody” lub „kary”."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Funkcja aktywacji\n",
|
|||
|
"\n",
|
|||
|
"Funkcja bipolarna:\n",
|
|||
|
"\n",
|
|||
|
"$$ g(z) = \\left\\{ \n",
|
|||
|
"\\begin{array}{rl}\n",
|
|||
|
"1 & \\textrm{gdy $z > \\theta_0$} \\\\\n",
|
|||
|
"-1 & \\textrm{wpp.}\n",
|
|||
|
"\\end{array}\n",
|
|||
|
"\\right. $$\n",
|
|||
|
"\n",
|
|||
|
"gdzie $z = \\theta_0x_0 + \\ldots + \\theta_nx_n$,<br/>\n",
|
|||
|
"$\\theta_0$ to próg aktywacji,<br/>\n",
|
|||
|
"$x_0 = 1$. "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def bipolar_plot():\n",
|
|||
|
" matplotlib.rcParams.update({'font.size': 16})\n",
|
|||
|
"\n",
|
|||
|
" plt.figure(figsize=(8,5))\n",
|
|||
|
" x = [-1,-.23,1] \n",
|
|||
|
" y = [-1, -1, 1]\n",
|
|||
|
" plt.ylim(-1.2,1.2)\n",
|
|||
|
" plt.xlim(-1.2,1.2)\n",
|
|||
|
" plt.plot([-2,2],[1,1], color='black', ls=\"dashed\")\n",
|
|||
|
" plt.plot([-2,2],[-1,-1], color='black', ls=\"dashed\")\n",
|
|||
|
" plt.step(x, y, lw=3)\n",
|
|||
|
" ax = plt.gca()\n",
|
|||
|
" ax.spines['right'].set_color('none')\n",
|
|||
|
" ax.spines['top'].set_color('none')\n",
|
|||
|
" ax.xaxis.set_ticks_position('bottom')\n",
|
|||
|
" ax.spines['bottom'].set_position(('data',0))\n",
|
|||
|
" ax.yaxis.set_ticks_position('left')\n",
|
|||
|
" ax.spines['left'].set_position(('data',0))\n",
|
|||
|
"\n",
|
|||
|
" plt.annotate(r'$\\theta_0$',\n",
|
|||
|
" xy=(-.23,0), xycoords='data',\n",
|
|||
|
" xytext=(-50, +50), textcoords='offset points', fontsize=26,\n",
|
|||
|
" arrowprops=dict(arrowstyle=\"->\"))\n",
|
|||
|
"\n",
|
|||
|
" plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 12,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAdMAAAElCAYAAAC/NQipAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3XlYlPX+//HXKDBCkEJq7slRsYSK\no+QleEwjLXJtcT1hYouaWRHlUTu4lBqW2sFzyrUFU7sS7CqJXMoMy8qSOqZBmSaQlqZmHksEBe7f\nH32ZX9OoDNwMN8vzcV1c4WeZec/dcL+4l/lgMwxDAACg8hpYXQAAALUdYQoAgEmEKQAAJhGmAACY\nRJgCAGASYQoAgEmEKQAAJhGmAACYRJgCAGASYQrUAjabbZPVNQC4MK8KjmftQcAC3bp1k/j5A6xg\nc2cQR6YAAJhEmAIAYBJhCgCASYQpAAAmEaYAAJhEmAIAYBJhCgCASYQpAAAmEaYAAJhEmAIAYBJh\nCgCASYQpAAAmEaYAAJhEmAIAYBJhCgCASYQp4KZDhw7pwQcfVGRkpPz8/GSz2ZSXl+fW3NLSUiUl\nJal9+/Zq1KiRrr32Wr3++uueLRhAtSFMATft379fqampCgwMVK9evSo0d/r06Zo1a5YmTZqkjRs3\nqkePHho2bJg2bNjgoWoBVCebYRgVGV+hwUBdUlpaqgYNfv/984UXXtB9992n3NxctW/f/qLzjh49\nqrZt22rq1Kl64oknHO033nijjh07pt27d5f73BEREcrKyjJVP4BKsbkziCNTwE1lQVpRmzdv1tmz\nZxUbG+vUHhsbqz179ig3N7cqygNgIcIU8LDs7GzZ7XZ17NjRqT00NFSSlJOTY0VZAKpQhU7z9unT\nx4iLi1NcXJyOHz+uoUOHuoy5//77NWLECB08eFCjR4926X/00Uc1aNAg7d27V+PHj3fpT0xMVN++\nfbVr1y7Fx8e79D/11FOKiorSxx9/rMcff9ylPzk5WeHh4dqyZYvmzJnj0r9s2TJ17txZb731lhYu\nXOjSv2rVKrVt21Zr167VkiVLXPrXrVunpk2bKiUlRSkpKS79GzZskJ+fnxYvXqzU1FSX/szMTEnS\nggULlJGR4dTn6+urjRs3SpJmz56t9957z6n/sssuc9y0Mm3aNH3yySdO/W3atNHq1aslSfHx8dq1\na5dTf0hIiJYvXy5JGjdunL799lun/vDwcCUnJ0v6/ajp0KFDTv2RkZFKSkqSJN1xxx36+eefnfpv\nvPFGTZ8+XZJ0yy236MyZM079AwcO1GOPPSZJ6tOnj/5s+PDhmjhxogoKCtS/f3+X/qp8742ctUIn\n2/SU0dDHZVxNdHhlvFqOSba6DKBatN8x3/G91fu9zMxMTvMCF1KbghRAzccNSKiX2k992+oSKoQj\nU9QnefMGWF3CH7l1ZOrl6SqAmq4yP7gVuZv3lVde0ZgxY7Rv3z6n66YpKSkaO3asDhw4oODg4Is+\nRsSWmcqqWTsYAH/AaV7Aw2JiYuTt7a01a9Y4ta9evVphYWHlBimAmo8jU6AC1q1bJ0n6/PPPJUkb\nN25Us2bN1KxZM/Xu3VuS5OXlpTFjxujFF1+UJDVv3lwJCQlKSkpSQECAunbtqrVr12rr1q1KT0+3\n5oUAqFKEKVABw4YNc/r3xIkTJUm9e/d23KldUlKikpISp3Fz586Vv7+/Fi1apCNHjqhz585KTU3V\nwIEDq6VuAJ5FmAIV4M4Ne+cb07BhQyUmJioxMdETZQGwGNdMAQAwiTAFAMAkwhQAAJMIUwAATCJM\nAQAwiTAFAMAkwhQAAJMIUwAATCJMAQAwiTAFAMAkwhQAAJMIUwAATCJMAQAwiTAFAMAkwhQAAJMI\nUwAATCJMAQAwiTAFAMAkwhQAAJMIUwAATCJMAQAwiTAFAMAkwhQAAJMIUwAATCJMAQAwiTAFAMAk\nwhQAAJMIUwAATCJMAQAwiTAFAMAkwhQAAJMIUwAATCJMAQAwiTAFAMAkwhQAAJMIUwAATCJMUaVO\nnjyp2bNnKzw8XAEBAQoKClJ0dLQ2bNhgdWkA4DFeVheAuiMzM1N///vfdfjwYaf2999/X5mZmVqy\nZInGjx9vUXUA4DkcmaJKZGZmqn///jp8+LBiY2O1c+dO/fLLL/rss88UGRkpwzCUkJCgQ4cOWV0q\nAFQ5whSm/fTTTxoxYoTOnDmjZ555RqtWrVJERISaNGmi6667TuvXr5e/v78KCgr06quvWl0uAFQ5\nwhSmTZ48WUePHtXAgQM1efJkl/5mzZqpZ8+ekqRt27ZVd3kA4HGEKUz55ptvtGbNGtlsNj3zzDMX\nHNesWTNJUn5+fnWVBgDVhjCFKcuWLVNpaan69u2rq6666oLjzp075/RfAKhLCFNUWmlpqV577TVJ\n0p133nnRsSdOnJAk+fr6erwuAKhuhCkqbdeuXTpy5IgkKS4uTjab7YJf7777riSpbdu2VpZs2sGD\nBzV06FA1btxYl156qW6//XZ9//33bs290LbZtWuXh6sG4Gl8zhSVVpmbiTp16uSBSqpHQUGBoqOj\nZbfbtXLlStlsNiUmJuqGG27Q7t27dckll5T7GHFxcS6ftQ0JCfFUyQCqCWGKSvviiy8kSTfffLPe\nfPPNC47btGmTbrvtNklS165dq6U2T1ixYoUOHDigvXv3qmPHjpKka665Rp06ddKyZcuUkJBQ7mO0\nbt1aPXr08HSpAKoZp3lRad9++60kqV27dmrUqNEFv3bs2OGYc/311zs9xtq1axURESFfX181bdpU\nI0eOVF5eXnW+DLelp6erR48ejiCVpODgYPXs2VPr16+3sDIAViNMUWllqxkFBQVddNzbb78tSbrq\nqqvUrl07R/vSpUs1cuRINWrUSMnJyYqPj9eWLVsUGRlZI1dKys7OVlhYmEt7aGiocnJy3HqMJUuW\nyG63y8/PT9HR0frwww+rukwAFuA0LyrtzJkzkqRGjRpdcMw333yjr776SpI0evRoR/uJEyf0j3/8\nQ127dlVmZqa8vH5/K8bExKh79+5KTExUSkqK54qvhBMnTigwMNClPSgoSL/88ku582NjYzVw4EC1\natVK+fn5mj9/vqKjo/Xuu++qT58+HqgYQHXhyBSV5uPjI0k6ffr0BccsXrxY0u8fiRk7dqyj/c03\n39Svv/6qhx56yBGkkhQREaHrr79eaWlpKiws9FDl1li1apVGjBihXr16KTY2Vtu3b1erVq2UmJh4\n3vHLly9XRESEIiIidOzYsWquFkBFEKaotCuuuEKStHfv3vP25+bmavny5ZKkSZMmqUWLFo6+zz77\nTJIUFRXlMi8qKkoFBQVunzqtLoGBgec9Ar3QEWt5AgICNGDAAO3cufO8/ePGjVNWVpaysrIcK0gB\nqJkIU1Ra7969JUnvvPOOfvzxR6e+06dPa8SIESoqKlJISIhmzpzp1P/DDz9Iktq0aePyuGVtNe26\naWhoqLKzs13ac3Jy1KVLl0o/rs1mM1MWgBqAMEWljR07Vl5eXioqKtLgwYMdf3btnXfeUc+ePbVz\n504FBQUpLS3N5TOYBQUFkiS73e7yuGXXYMvG1BSDBw/Wjh07dODAAUdbXl6ePvroIw0ePLjCj3fq\n1CllZGSoe/fuVVkmAAsQpqi0q666ynHE+fnnn6t79+4KCgrSzTffrC+//FJ/+ctf9N577+maa65x\nmevn5ydJKioqcukru1ZaNqamuO+++9S+fXsNGTJE69evV3p6uoYMGaK2bds6LcSQn58vLy8vPfnk\nk462BQsW6L777tOrr76qzMxMrVy5Uj179tSRI0c0d+5cK14OgCpEmMKUxMRErV27VpGRkbr00ksV\nEBCgbt266ZlnntGePXsUHh5+3nmtW7eWdP5TuWVt5zsFbKVLLrlEW7duVUhIiEaPHq0777xTwcHB\n2rp1q/z9/R3jDMNQSUm
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f8808605750>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"bipolar_plot()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Perceptron – schemat\n",
|
|||
|
"\n",
|
|||
|
"<img src=\"perceptron.png\" />"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Perceptron – zasada działania\n",
|
|||
|
"\n",
|
|||
|
"1. Ustal wartości początkowe $\\theta$ (wektor 0 lub liczby losowe blisko 0).\n",
|
|||
|
"1. Dla każdego przykładu $(x^{(i)}, y^{(i)})$, dla $i=1,\\ldots,m$\n",
|
|||
|
" * Oblicz wartość wyjścia $o^{(i)}$:\n",
|
|||
|
" $$o^{(i)} = g(\\theta^{T}x^{(i)}) = g(\\sum_{j=0}^{n} \\theta_jx_j^{(i)})$$\n",
|
|||
|
" * Wykonaj aktualizację wag (tzw. _perceptron rule_):\n",
|
|||
|
" $$ \\theta := \\theta + \\Delta \\theta $$\n",
|
|||
|
" $$ \\Delta \\theta = \\alpha(y^{(i)}-o^{(i)})x^{(i)} $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"$$\\theta_j := \\theta_j + \\Delta \\theta_j $$\n",
|
|||
|
"\n",
|
|||
|
"Jeżeli przykład został sklasyfikowany **poprawnie**:\n",
|
|||
|
"\n",
|
|||
|
"* $y^{(i)}=1$ oraz $o^{(i)}=1$ : $$\\Delta\\theta_j = \\alpha(1 - 1)x_j^{(i)} = 0$$\n",
|
|||
|
"* $y^{(i)}=-1$ oraz $o^{(i)}=-1$ : $$\\Delta\\theta_j = \\alpha(-1 - -1)x_j^{(i)} = 0$$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Czyli: jeżeli trafiłeś, to nic nie zmieniaj."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"$$\\theta_j := \\theta_j + \\Delta \\theta_j $$\n",
|
|||
|
"\n",
|
|||
|
"Jeżeli przykład został sklasyfikowany **niepoprawnie**:\n",
|
|||
|
"\n",
|
|||
|
"* $y^{(i)}=1$ oraz $o^{(i)}=-1$ : $$\\Delta\\theta_j = \\alpha(1 - -1)x_j^{(i)} = 2 \\alpha x_j^{(i)}$$\n",
|
|||
|
"* $y^{(i)}=-1$ oraz $o^{(i)}=1$ : $$\\Delta\\theta_j = \\alpha(-1 - 1)x_j^{(i)} = -2 \\alpha x_j^{(i)}$$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Czyli: przesuń wagi w odpowiednią stronę."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Perceptron – zalety i wady\n",
|
|||
|
"\n",
|
|||
|
"Zalety:\n",
|
|||
|
"* intuicyjny i prosty\n",
|
|||
|
"* łatwy w implementacji\n",
|
|||
|
"* jeżeli dane można liniowo oddzielić, algorytm jest zbieżny w skończonym czasie"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Wady:\n",
|
|||
|
"* jeżeli danych nie można oddzielić liniowo, algorytm nie jest zbieżny"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 13,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def plot_perceptron():\n",
|
|||
|
" plt.figure(figsize=(12,3))\n",
|
|||
|
"\n",
|
|||
|
" plt.subplot(131)\n",
|
|||
|
" plt.ylim(-0.2,1.2)\n",
|
|||
|
" plt.xlim(-0.2,1.2)\n",
|
|||
|
"\n",
|
|||
|
" plt.title('AND')\n",
|
|||
|
" plt.plot([1,0,0], [0,1,0], 'ro', markersize=10)\n",
|
|||
|
" plt.plot([1], [1], 'go', markersize=10)\n",
|
|||
|
"\n",
|
|||
|
" ax = plt.gca()\n",
|
|||
|
" ax.spines['right'].set_color('none')\n",
|
|||
|
" ax.spines['top'].set_color('none')\n",
|
|||
|
" ax.xaxis.set_ticks_position('none')\n",
|
|||
|
" ax.spines['bottom'].set_position(('data',0))\n",
|
|||
|
" ax.yaxis.set_ticks_position('none')\n",
|
|||
|
" ax.spines['left'].set_position(('data',0))\n",
|
|||
|
"\n",
|
|||
|
" plt.xticks(np.arange(0, 2, 1.0))\n",
|
|||
|
" plt.yticks(np.arange(0, 2, 1.0))\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
" plt.subplot(132)\n",
|
|||
|
" plt.ylim(-0.2,1.2)\n",
|
|||
|
" plt.xlim(-0.2,1.2)\n",
|
|||
|
"\n",
|
|||
|
" plt.plot([1,0,1], [0,1,1], 'go', markersize=10)\n",
|
|||
|
" plt.plot([0], [0], 'ro', markersize=10)\n",
|
|||
|
"\n",
|
|||
|
" ax = plt.gca()\n",
|
|||
|
" ax.spines['right'].set_color('none')\n",
|
|||
|
" ax.spines['top'].set_color('none')\n",
|
|||
|
" ax.xaxis.set_ticks_position('none')\n",
|
|||
|
" ax.spines['bottom'].set_position(('data',0))\n",
|
|||
|
" ax.yaxis.set_ticks_position('none')\n",
|
|||
|
" ax.spines['left'].set_position(('data',0))\n",
|
|||
|
"\n",
|
|||
|
" plt.title('OR')\n",
|
|||
|
" plt.xticks(np.arange(0, 2, 1.0))\n",
|
|||
|
" plt.yticks(np.arange(0, 2, 1.0))\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
" plt.subplot(133)\n",
|
|||
|
" plt.ylim(-0.2,1.2)\n",
|
|||
|
" plt.xlim(-0.2,1.2)\n",
|
|||
|
"\n",
|
|||
|
" plt.title('XOR')\n",
|
|||
|
" plt.plot([1,0], [0,1], 'go', markersize=10)\n",
|
|||
|
" plt.plot([0,1], [0,1], 'ro', markersize=10)\n",
|
|||
|
"\n",
|
|||
|
" ax = plt.gca()\n",
|
|||
|
" ax.spines['right'].set_color('none')\n",
|
|||
|
" ax.spines['top'].set_color('none')\n",
|
|||
|
" ax.xaxis.set_ticks_position('none')\n",
|
|||
|
" ax.spines['bottom'].set_position(('data',0))\n",
|
|||
|
" ax.yaxis.set_ticks_position('none')\n",
|
|||
|
" ax.spines['left'].set_position(('data',0))\n",
|
|||
|
"\n",
|
|||
|
" plt.xticks(np.arange(0, 2, 1.0))\n",
|
|||
|
" plt.yticks(np.arange(0, 2, 1.0))\n",
|
|||
|
"\n",
|
|||
|
" plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 14,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAArMAAADJCAYAAAAwwbqVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAFkxJREFUeJzt3X9w1Hedx/HXO7ggIbc0tFx7tmfh\nAH8AB9bGG7BSbKt3MNdmOhj8MQdqO9Z2MnJeyOjoHfXsDHPlqhxU53rl8AcYblS6dYSxRY/rSVUs\no6TVOEEqBFKg2iqlV0hi2y187o/PF9hsNsl+k3x395M8HzPf2fL9fva7n/32886+9vv97vdrzjkB\nAAAAIaoqdwcAAACAoSLMAgAAIFiEWQAAAASLMAsAAIBgEWYBAAAQLMIsAAAAgkWYBQAAQLAIs2Vg\nZg+bmTOzgwO0cdH0pJlZgeV10fItefO35DzXmVnWzE6Z2a+iZTeb2bgE3hYwppjZe80sY2bPmtkr\nZnbSzH5oZh83s9cVaN+ZV5tnzewPZrbLzP6mHO8BGC3MrDmqq3/vZ/lfmFmXmR03s8l5y95iZpvM\n7LCZ/dHMTkefvZ83s0v6Wd+2vHo+Z2YvmdkTZnaXmZGvSsi4aUJpmdllkn4r6XWSTNK7nHN7C7TL\n/R/zIefct/KW10n6uaStzrmP5szfIukjkjZHr1MlKS3prZIWSZoo6WeSPuCc6xyp9wWMFVFQ3STp\ndkldkr4n6aikSyUtlfTnkvZLutk593zO8zolvUHSv0SzJkiaLelm+Tpd6ZzbVpp3AYwuUXj8saSF\nkm5yzv0wZ5lJ2iPpeklLnHM/yFnWKOlL0T93S2qTr83Fkt4m6feSbnHO/Szv9bZJ+jv5vwXPydfw\n1ZKWSaqRtMk5d9eIv1EU5pxjKuEkqUmSk/TF6PGr/bRzkn4nqVvSIUmvy1teF7XZkjd/SzS/rsA6\nL5XUEi0/KKmm3NuDiSm0SdK/RTX0hKQr8pZNkPQfOctTOcs6JXUVWN/7o/bPlPu9MTGFPEl6k6Qe\n+S+XNTnz/yGqsc157ZflfNZeW2B9d0h6TdJJSVflLdsWPfdtefNnRZ/b5yRdXe5tMlYmdoOX3m2S\nzkj6nKR2Se83s0n9tH1B0v2SZsoX1bA4516Q9GFJ/yPpzZI+Mdx1AmOJmb1Z0ifla7PeOfdc7nLn\n3CuSGiX9SNIC+aMkg3lIfg/vG81s6sj2GBg7nHO/kbRG0jRJX5AkM3uT/NGQY5Kaz7c1s5T856vk\nj1S2FljfZkn3ye8IWltkHw5J+on8kde3D/GtICbCbAmZ2Tsk/aWk7zjneuT3ktbI75npz79KOiXp\nc2ZWPdw+OP/V8fxhzoFeF0BfH5H/u/mfzrk/FGqQV2O3xVx/dhh9AyBtlPRTSXea2V9L2ip/et3H\nnHOnc9q9V9JVkn7snPvRAOv7oqRXJX3QzF4fsy/Uc4kQZkvr9uixJXr8L/lDEbcXbi45516StE7S\nFfKHSkbCT+UPncwv9EMVAP1aGD0+Nki7H8nX2DuK+MHlcvkvte3Ouf8bZv+AMc05d07+S+TLknbK\nHyHZ5Jzbnde0qFp2zp2S9Av5U4gG3dNqZrMkvUvSK/LnzqMECDIlEn2j+6CkZyX9UJKccyfMbI+k\nG81sVnR4opAvS/p7SZ82swej4hoy59wrZvaCpMslTZE/wR3A4K6IHk8M1Mg598ecGrtUF2tsvJl9\n/vx/y/8A7Bb5c+waR7y3wBjknPuNmW2VdJf8kc1PFWhWVC1Hjkv6q5zn5LrLzM7/AOyNkt4nqVpS\nU/5pSEgOYbZ03ifpEvkT0M/lzG+RdKP83tnPFnqic+5lM7tH/goFn1XhwgRQ+VKS/jlvXo/8L6x/\nUob+AKOOmV0tf6UBye+wuV7SIwm93J0F5n3SOfelAvOREE4zKJ3zpxLkX3rnYfkPsw8Pcjjy6/JX\nIPiEmV01nI6Y2QT5vUVn5b+1AijO+T0tA9ZgdCTmUvlz7V7IWdTtnDPnnEmaLOkD8qcaPTzcugZw\n4TJcX5X0J/Kn5v1R0qb8a8uqyFrOa/O7Asuuiep5ovzlLw9KWh+dr4sSIcyWgJlNk3RD9M9f5l5o\nWdJp+UMSb5C0pL91OOfOyv9K8/WSPj/MLr1Tfq/8L51zrw1zXcBY8kT0eNMg7a6Xr7GfR7Xbh3Pu\ntHNuu6SPSfpTSQUv9g4glrvk63OLc+5+Sf8k6UpJG/LaFVXLZlYrf73ZVyQ91V8759zL0dGVm+V3\nFH3VzCYO6R0gNsJsadwmf5mOx+W/MeZP34na9ftDMElyzj0sf8ODj0p6y1A6En1r/cfon98eyjqA\nMewb8teWvMPMLi3UIKqx86cMfX2wFTrnvi1pn6R6M3vnSHUUGGuiHUf3yd8wqCmafb+kvZJuy7vT\n3m7537AsMrN3DbDaZvkff33TOffyYH1wznXI/87lKkmrYr4FDBFhNmHRXUk+In8ocYVz7mP5k/wl\nsn4n6ZboDmED+aykcRrC3lkzmyJ/mZL3SHpa0gNx1wGMZc65g/J3C7pM0g4zuzx3uZmNl/8ge7d8\nQP1Gkau+J+8RQAzRl8ivyV8Z5OPnrwwS/UbldvnTDTabWTqan9XFwLvdzK4psM7bJX1G/lShu2N0\n5wvypw9+ysxqhvaOEAc/AEveTfK3uPuBc67gryadc2fNrEXSpyWtVN/DIblt/9fM/lvSYOfjfNzM\nbpbfI5yW35O7WL1vZ9sV980A0Kck1crfgOSQmeXfzvaNklol3Rp9YA7KOfd9M/uZpPeY2SLn3I+T\n6TowajXKn863xTnX68de0dUN7pa/ZuwXFP1oyzn3kJl9Uv6ufj+PPlvb5K808m5J1+ji7WyLuerB\n+df7vZltkg/LqyTdO8z3hkGYv743kmJm35S/JNeHnHPfGqDdWyT9WtKvnHPzovNp251zcwu0fbv8\n9etM0lbn3Edzlm1R77sOnZW/49gJ+Q/YjKRH866oACCm6JDlnfLXsbxM/i5ebZK+Kelr+UHWzDol\nXeacK7inxsz+VtL3JO1xzt1QqA2AvsxsuqRfSXpJ0pxC12uOjpL+RP76su9xzj2Ws2y2fPC8Uf78\n2qykw/LXqd3onHuxwPq2yV8x4Rrn3C8KLL9C/ktuj6Rpzrkzw32f6B9hFgAAAMHinFkAAAAEizAL\nAACAYBFmAQAAECzCLAAAAIIVN8y6Uk5Lliwp6esxMQ1hqmQl3RbUK1MAUyUr6bagXpkCmIpW0Xtm\nT548We4uACgS9QqEg3rFaFLRYRYAAAAYCGEWAAAAwSLMAgAAIFiEWQAAAASLMAsAAIBgEWYBAAAQ\nLMIsAAAAgkWYBQAAQLAIswAAAAgWYRYAAADBIswCAAAgWIRZAAAABIswCwAAgGARZgEAABAswiwA\nAACCRZgFAABAsBINsydOnNCqVau0cOFCVVdXy8zU2dmZ5EsCGAZqFggH9Qp4iYbZw4cPa/v27aqt\nrdWiRYuKe1JHh9TYKKXTUmurf2xs9POBAjpOdajxkUal702r6p4qpe9Nq/GRRnWcYszEFbdmc7d9\n629b2fYYFPU6cqhXJC43k1VVVWwmM+dcnPaxGp87d05VVT4vf+UrX9Edd9yho0ePatq0aYWfsGuX\n1NAgZbNSNqs6SfslKZXyUyYjLV0apwsY5XYd2qWGhxqUPZtV9lz2wvxUVUqpcSlllme0dFaiY8aS\nXPkwxapXKV7N9tn2myTdWdJtj8BQrwOiXlFZ8jLZBaXLZEXXa6J7Zs8XWVE6OvxG6+npvdEk/++e\nHr+8wr4NoHw6TnWo4aEG9WR7en0wSlL2XFY92R41PNTAXocYiq1Ztj3iYsyMPOoViQksk1XOD8DW\nr++7wfJls9KGDaXpDyre+ifWK3t24DGTPZvVhn2MmZHGtkdcjJnyYdsjtsAyWeWE2W3bittwLS2l\n6Q8q3ra2bX32MuTLnsu
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f880643ba90>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"plot_perceptron()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Funkcje aktywacji\n",
|
|||
|
"\n",
|
|||
|
"Zamiast funkcji bipolarnej możemy zastosować funkcję sigmoidalną jako funkcję aktywacji."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 15,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def plot_activation_functions():\n",
|
|||
|
" plt.figure(figsize=(16,7))\n",
|
|||
|
" plt.subplot(121)\n",
|
|||
|
" x = [-2,-.23,2] \n",
|
|||
|
" y = [-1, -1, 1]\n",
|
|||
|
" plt.ylim(-1.2,1.2)\n",
|
|||
|
" plt.xlim(-2.2,2.2)\n",
|
|||
|
" plt.plot([-2,2],[1,1], color='black', ls=\"dashed\")\n",
|
|||
|
" plt.plot([-2,2],[-1,-1], color='black', ls=\"dashed\")\n",
|
|||
|
" plt.step(x, y, lw=3)\n",
|
|||
|
" ax = plt.gca()\n",
|
|||
|
" ax.spines['right'].set_color('none')\n",
|
|||
|
" ax.spines['top'].set_color('none')\n",
|
|||
|
" ax.xaxis.set_ticks_position('bottom')\n",
|
|||
|
" ax.spines['bottom'].set_position(('data',0))\n",
|
|||
|
" ax.yaxis.set_ticks_position('left')\n",
|
|||
|
" ax.spines['left'].set_position(('data',0))\n",
|
|||
|
"\n",
|
|||
|
" plt.annotate(r'$\\theta_0$',\n",
|
|||
|
" xy=(-.23,0), xycoords='data',\n",
|
|||
|
" xytext=(-50, +50), textcoords='offset points', fontsize=26,\n",
|
|||
|
" arrowprops=dict(arrowstyle=\"->\"))\n",
|
|||
|
"\n",
|
|||
|
" plt.subplot(122)\n",
|
|||
|
" x2 = np.linspace(-2,2,100)\n",
|
|||
|
" y2 = np.tanh(x2+ 0.23)\n",
|
|||
|
" plt.ylim(-1.2,1.2)\n",
|
|||
|
" plt.xlim(-2.2,2.2)\n",
|
|||
|
" plt.plot([-2,2],[1,1], color='black', ls=\"dashed\")\n",
|
|||
|
" plt.plot([-2,2],[-1,-1], color='black', ls=\"dashed\")\n",
|
|||
|
" plt.plot(x2, y2, lw=3)\n",
|
|||
|
" ax = plt.gca()\n",
|
|||
|
" ax.spines['right'].set_color('none')\n",
|
|||
|
" ax.spines['top'].set_color('none')\n",
|
|||
|
" ax.xaxis.set_ticks_position('bottom')\n",
|
|||
|
" ax.spines['bottom'].set_position(('data',0))\n",
|
|||
|
" ax.yaxis.set_ticks_position('left')\n",
|
|||
|
" ax.spines['left'].set_position(('data',0))\n",
|
|||
|
"\n",
|
|||
|
" plt.annotate(r'$\\theta_0$',\n",
|
|||
|
" xy=(-.23,0), xycoords='data',\n",
|
|||
|
" xytext=(-50, +50), textcoords='offset points', fontsize=26,\n",
|
|||
|
" arrowprops=dict(arrowstyle=\"->\"))\n",
|
|||
|
"\n",
|
|||
|
" plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 16,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA5IAAAGRCAYAAAAXeoyVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzs3XlYlPXex/HPALKjgOCSKwpu4I7m\ncsxSK4/rqaz0pE/aomZppi1Wbsc0ray0U25tltVJs0yzNFMz23zMXcFdUdEQFFFkh7mfPzyO8aAC\nMnAPM+/XdXHp73cv8wVHvny4N4thGAIAAAAAoKjczC4AAAAAAFC+ECQBAAAAAMVCkAQAAAAAFAtB\nEgAAAABQLARJAAAAAECxECQBAAAAAMVCkAQAAAAAFAtBEgAAAABQLARJAAAAAECxeBRzfaNUqgCc\nTPfu3bV69WqzywDKA4vZBTgBejNQBPRmoMiK1Js5IgmUgjNnzphdAgAA+At6M2BfBEkAAAAAQLEQ\nJAEAAAAAxUKQBAAAAAAUC0ESAAAAAFAsBEkAAAAAQLEQJAEAAAAAxUKQBAAAAAAUC0ESAAAAAFAs\nBEkAAAAAQLEQJAEAAAAAxUKQBAAAAAAUC0ESAAAAAFAsBEkAAAAAQLEQJAEAAAAAxUKQBAAAAAAU\nC0ESAAAAAFAsBEkAAAAAQLEQJAEAAAAAxUKQBAAAAAAUC0ESAAAAAFAsBEkAAAAAQLEQJAEAAAAA\nxUKQBAAAAAAUC0ESAAAAAFAsBEk4rfj4eI0cOVLt27eXr6+vLBaL4uLiirSt1WrV9OnTVbduXXl7\ne6t58+b68ssvS7dgAACcHL0ZcB4ESTitQ4cOacmSJQoKClKnTp2Kte2ECRM0efJkPfHEE1q1apXa\ntWune++9V999910pVQsAgPOjNwPOw2IYRnHWL9bKgJmsVqvc3C79ruS9997To48+qqNHj6pu3brX\n3S4xMVG1atXSuHHj9K9//cs237VrVyUlJWnXrl2FvnZ0dLS2bNlSovoBF2ExuwAnQG9GuUFvBsqF\nIvVmjkjCaV1uVMX1/fffKzs7WwMHDsw3P3DgQO3evVtHjx61R3kAALgcejPgPEw5InnrrbcWmLvv\nvvs0YsQIpaenq0ePHgWWDx48WIMHD9aZM2fUr1+/Assfe+wx3X///Tpx4oQGDRpUYPnYsWPVu3dv\n7d+/X8OGDSuwfPz48erWrZt27Nih0aNHF1j+8ssvq0OHDvrtt9/0wgsvFFg+a9YstWjRQmvXrtXU\nqVMLLJ8/f74aNmyob775Rq+//nqB5YsWLVKtWrW0ePFizZ07t8DypUuXKiQkRAsXLtTChQsLLP/u\nu+/k6+urOXPmaMmSJQWWb9iwQZI0c+ZMrVy5Mt8yHx8frVq1SpL00ksvad26dfmWV65c2XYNwvPP\nP6/ff/893/KaNWvqk08+kSSNHj1aO3bsyLe8QYMGWrBggSRp6NChOnDgQL7lLVq00KxZsyRdagjx\n8fH5lrdv317Tp0+XJN1zzz06e/ZsvuVdu3bVhAkTJEl///vflZGRkW95r169FBgYqEcffVQ333yz\nvL29JUnnq0crpWZHGe6esrc/Pxqt6g/Osvt+AUcSN6OnPXbDEcmSozfTm52mN1/2/997R44cUXx8\nvG655RZJV957a9as0Z133qmoqChVrlzZtv3V3ntbt25V69atJfHec/X33tNPPy3Jub7vXf43tZMi\n9WYPe74iUJ6UVogEAAD2lZubKw+Pgj+2BgYG2pZfzaeffqqtW7dKknJyckqvQMAFcY0kXMLVrsOo\nO+7bUns9jkjCFXBE0mHQm1EuFecayaFDh2rFihVKSEjIN3/o0CFFRETo448/vurRn7/iGkmgyDgi\nCRTVX38gfu655zR79mxlZGTIYrny/2jz5s26+eabtXLlSvXsef0foKPXTtIW+/yQDQCAywsKClJK\nSooMw8jXm5OTkyVJwcHBZpUGlLnsXKuS07J1Ni1L59JydDYtS8lp2WpeK1CtageVWR0ESeD/iYyM\nVFZWlg4fPqzw8HDbfGxsrCSpSZMmZpUGAIBLojfD2RmGofMZOTp9IUunL2Qq4UKmklKz8n2cuXjp\n40Lm1U/lHtUlnCAJmKl79+6qUKGCPv30U02aNMk2/8knnygqKkphYWEmVgcAgOuhN6O8y8zJ08mU\nDJ08l2H781RKhv48n6k/z1/6MyvXWqLXOJuWbadqi4YgCae2dOlSSbJdaL9q1SqFhoYqNDQ033oP\nP/yw3n//fUlSlSpVNGbMGE2fPl0BAQFq1aqVFi9erPXr12vFihVl+wkAAOBkrtebO3fuLEny8PDQ\ngw8+SG9GuZKSnq0jZ9J0/Gy64s6m6djZdB1PTteJ5HQlpmbZ7XXcLFKwn5cq+3kq+C8fN9cr21O8\nCZJwavfee2++8YgRIyTpUqNq94xtPi8vL99606ZNk7+/v2bPnq2EhAQ1bNhQS5YsUa9evUq/aAAA\nnNj1evPlRxjk5eXRm+GQrFZD8ecydDAxVYcSL+pQ4kUdOZOmI0kXdS69ZHcG9vN0V9VK3qoa4K1q\nlbwVGuClKgFeCg3wUqi/l0ICvBTi76VAnwpyczP/XnXctRUu6693bbXT3SdtuDMcUGTmd8Lyj94M\nFAG9GcWVkp6t2FMXFPvnBe1PSNX+06k6cDpVmTnFPwXVzSJVr+SjmkE+qhHko5qBProp0EfVA31U\nvZK3qlfyVoB3hVL4LG4Id20FAAAAgMIkp2VrZ3yKdsef1+6T5xV76oJOpmQUax/eFdxUt7Kf6lb2\nU50QX9Wt7Kfawb6qFeSr6oHequDuVkrVm4MgCQAAAMBlZOdaFXPqvLYfT9G24+e040SK4s8VPTRW\n9vNUeBV/RVT1V3iov+pX8Ve9UH9Vr+jtEKeclhWCJAAAAACndTErV1uPndMfR5O1OS5ZO0+kFOkO\nqZ7ubmpYLUCNqweoUbWKalQtQA2rBaiyv1cZVO34CJIAAAAAnEZmTp62Hjun3w6f0e+Hz2pn/Hnl\nWa9/Obmnh5uaVK+o5jUrqWnNQEXVqKj6of5OdzqqPREkAQAAAJRbhmHoUOJF/XQgSRsPntH/Hjlb\n6BHHOpV91ap2kFrWDlTLWkFqVD2A0FhMBEkAAAAA5UpWbp7+90iy1u9L1Lp9p3Ui+drXOFosUqNq\nFXVzWLDa1A1Wm7pBqlLRuwyrdU4ESQAAAAAOLy0rVxv2J2l1TIJ+3Jeoi1m511y3Xoif/hYRog71\nK+vmsMoK8vMsw0pdA0ESAAAAgENKz87Vur2J+mbnKW04kKTsa5yy6u/lob+Fh+iWBqG6pUGIagb5\nlnGlrocgCQAAAMBh5OZZtfFgkr7efkpr955WenbeVderU9lXXRtVVdfGVdSmbrA8PbjGsSwRJAEA\nAACYLvbUBX21LV5f7zilMxezrrpOo2oB6h5VTd2jqqlh1QBZLK7z3EZHQ5AEAAAAYIq0rFx9s/OU\n/rP5uHbGn7/qOuFV/NWn+U3q1ay66oX6l3GFuBaCJAAAAIAytT8hVR/9Hqfl208q7SqnrlYJ8NJd\nLWuob4saalydI4+OiCAJAAAAoNTl5lm1dm+iFv52VJuOJBdY7unupjujqqlf65rqWL+yPHiuo0Mj\nSAIAAAAoNenZuVryxwm9/+vRqz7vMbyKvwa0ra27W9bgMR3lCEESAAAAgN2dvZilj36L08ebjikl\nPSffMnc3i+6MrKoH29dV27BgTl0thwiSAAAAAOwmMTVT7248ok82HVdGTv7rHwN9K+iBm2vrgZvr\n6KZAH5MqhD0QJAEAAACUWGJqpub8eFj/2XxcWbnWfMtqB/vqkU5h6te6pnw9iSDOgH9FAAAAADfs\nfHqO5m08rA9/ParMnPwBsnH1inr8tvrqHlmNm+c4GYIkAAAAgGLLzMnT+78c1byfDis1MzffsqY1\nKmlU1wh1a1yF6x+dFEE
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f88064ca690>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"plot_activation_functions()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Perceptron a regresja liniowa"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"<img src=\"reglin.png\" />"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Uczenie regresji liniowej:\n",
|
|||
|
"* Model: $$h_{\\theta}(x) = \\sum_{i=0}^n \\theta_ix_i$$\n",
|
|||
|
"* Funkcja kosztu (błąd średniokwadratowy): $$J(\\theta) = \\frac{1}{m} \\sum_{i=1}^{m} (h_{\\theta}(x^{(i)}) - y^{(i)})^2$$\n",
|
|||
|
"\n",
|
|||
|
"* Po obliczeniu $\\nabla J(\\theta)$, zwykły SGD."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Perceptron a dwuklasowa regresja logistyczna"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"<img src=\"reglog.png\" />"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Uczenie dwuklasowej regresji logistycznej:\n",
|
|||
|
"* Model: $$h_{\\theta}(x) = \\sigma(\\sum_{i=0}^n \\theta_ix_i) = P(1|x,\\theta)$$\n",
|
|||
|
"* Funkcja kosztu (entropia krzyżowa): $$\\begin{eqnarray} J(\\theta) &=& -\\frac{1}{m} \\sum_{i=1}^{m} [y^{(i)}\\log P(1|x^{(i)},\\theta) \\\\ && + (1-y^{(i)})\\log(1-P(1|x^{(i)},\\theta))]\\end{eqnarray}$$\n",
|
|||
|
"\n",
|
|||
|
"* Po obliczeniu $\\nabla J(\\theta)$, zwykły SGD."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Perceptron a wieloklasowa regresja logistyczna"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"<img src=\"multireglog.png\" />"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Wieloklasowa regresji logistyczna\n",
|
|||
|
"* Model (dla $c$ klasyfikatorów binarnych): \n",
|
|||
|
"$$\\begin{eqnarray}\n",
|
|||
|
"h_{(\\theta^{(1)},\\dots,\\theta^{(c)})}(x) &=& \\mathrm{softmax}(\\sum_{i=0}^n \\theta_{i}^{(1)}x_i, \\ldots, \\sum_{i=0}^n \\theta_i^{(c)}x_i) \\\\ \n",
|
|||
|
"&=& \\left[ P(k|x,\\theta^{(1)},\\dots,\\theta^{(c)}) \\right]_{k=1,\\dots,c} \n",
|
|||
|
"\\end{eqnarray}$$\n",
|
|||
|
"* Funkcja kosztu (**przymując model regresji binarnej**): $$\\begin{eqnarray} J(\\theta^{(k)}) &=& -\\frac{1}{m} \\sum_{i=1}^{m} [y^{(i)}\\log P(k|x^{(i)},\\theta^{(k)}) \\\\ && + (1-y^{(i)})\\log P(\\neg k|x^{(i)},\\theta^{(k)})]\\end{eqnarray}$$\n",
|
|||
|
"\n",
|
|||
|
"* Po obliczeniu $\\nabla J(\\theta)$, **c-krotne** uruchomienie SGD, zastosowanie $\\mathrm{softmax}(X)$ do niezależnie uzyskanych klasyfikatorów binarnych."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Przyjmijmy: \n",
|
|||
|
"$$ \\Theta = (\\theta^{(1)},\\dots,\\theta^{(c)}) $$\n",
|
|||
|
"\n",
|
|||
|
"$$h_{\\Theta}(x) = \\left[ P(k|x,\\Theta) \\right]_{k=1,\\dots,c}$$\n",
|
|||
|
"\n",
|
|||
|
"$$\\delta(x,y) = \\left\\{\\begin{array}{cl} 1 & \\textrm{gdy } x=y \\\\ 0 & \\textrm{wpp.}\\end{array}\\right.$$\n",
|
|||
|
"\n",
|
|||
|
"* Wieloklasowa funkcja kosztu $J(\\Theta)$ (kategorialna entropia krzyżowa):\n",
|
|||
|
"$$ J(\\Theta) = -\\frac{1}{m}\\sum_{i=1}^{m}\\sum_{k=1}^{c} \\delta({y^{(i)},k}) \\log P(k|x^{(i)},\\Theta) $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Gradient $\\nabla J(\\Theta)$:\n",
|
|||
|
"$$ \\dfrac{\\partial J(\\Theta)}{\\partial \\Theta_{j,k}} = -\\frac{1}{m}\\sum_{i = 1}^{m} (\\delta({y^{(i)},k}) - P(k|x^{(i)}, \\Theta)) x^{(i)}_j \n",
|
|||
|
"$$\n",
|
|||
|
"\n",
|
|||
|
"* Liczymy wszystkie wagi jednym uruchomieniem SGD"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## Podsumowanie\n",
|
|||
|
"\n",
|
|||
|
"* W przypadku jednowarstowej sieci neuronowej wystarczy znać gradient funkcji kosztu.\n",
|
|||
|
"* Wtedy liczymy tak samo jak w przypadku regresji liniowej, logistycznej, wieloklasowej logistycznej itp.\n",
|
|||
|
" * Wymienione modele to szczególne przypadki jednowarstwowych sieci neuronowych.\n",
|
|||
|
"* Regresja liniowa i binarna regresja logistyczna to jeden neuron.\n",
|
|||
|
"* Wieloklasowa regresja logistyczna to tyle neuronów ile klas."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Funkcja aktywacji i funkcja kosztu są **dobierane do problemu**."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## 5.2. Wielowarstwowe sieci neuronowe\n",
|
|||
|
"\n",
|
|||
|
"czyli _Artificial Neural Networks_ (ANN) lub _Multi-Layer Perceptrons_ (MLP)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"<img src=\"nn1.png\" />"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Architektura sieci\n",
|
|||
|
"\n",
|
|||
|
"* Sieć neuronowa jako graf neuronów. \n",
|
|||
|
"* Organizacja sieci przez warstwy.\n",
|
|||
|
"* Najczęściej stosowane są sieci jednokierunkowe i gęste."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* $n$-warstwowa sieć neuronowa ma $n+1$ warstw (nie liczymy wejścia).\n",
|
|||
|
"* Rozmiary sieci określane poprzez liczbę neuronów lub parametrów."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Sieć neuronowa jednokierunkowa (_feedforward_)\n",
|
|||
|
"\n",
|
|||
|
"* Mając daną $n$-warstwową sieć neuronową oraz jej parametry $\\Theta^{(1)}, \\ldots, \\Theta^{(L)} $ oraz $\\beta^{(1)}, \\ldots, \\beta^{(L)} $ liczymy:<br/><br/> \n",
|
|||
|
"$$a^{(l)} = g^{(l)}\\left( a^{(l-1)} \\Theta^{(l)} + \\beta^{(l)} \\right). $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"<img src=\"nn2.png\" />"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Funkcje $g^{(l)}$ to tzw. **funkcje aktywacji**.<br/>\n",
|
|||
|
"Dla $i = 0$ przyjmujemy $a^{(0)} = \\mathrm{x}$ (wektor wierszowy cech) oraz $g^{(0)}(x) = x$ (identyczność)."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Parametry $\\Theta$ to wagi na połączeniach miedzy neuronami dwóch warstw.<br/>\n",
|
|||
|
"Rozmiar macierzy $\\Theta^{(l)}$, czyli macierzy wag na połączeniach warstw $a^{(l-1)}$ i $a^{(l)}$, to $\\dim(a^{(l-1)}) \\times \\dim(a^{(l)})$."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Parametry $\\beta$ zastępują tutaj dodawanie kolumny z jedynkami do macierzy cech.<br/>Macierz $\\beta^{(l)}$ ma rozmiar równy liczbie neuronów w odpowiedniej warstwie, czyli $1 \\times \\dim(a^{(l)})$."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* **Klasyfikacja**: dla ostatniej warstwy $L$ (o rozmiarze równym liczbie klas) przyjmuje się $g^{(L)}(x) = \\mathop{\\mathrm{softmax}}(x)$.\n",
|
|||
|
"* **Regresja**: pojedynczy neuron wyjściowy jak na obrazku. Funkcją aktywacji może wtedy być np. funkcja identycznościowa."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Pozostałe funkcje aktywacji najcześciej mają postać sigmoidy, np. sigmoidalna, tangens hiperboliczny.\n",
|
|||
|
"* Mogą mieć też inny kształt, np. ReLU, leaky ReLU, maxout."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## 5.3. Metoda propagacji wstecznej – wprowadzenie"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Jak uczyć sievi neuronowe?"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* W poznanych do tej pory algorytmach (regresja liniowa, regresja logistyczna) do uczenia używaliśmy funkcji kosztu, jej gradientu oraz algorytmu gradientu prostego (GD/SGD)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Dla sieci neuronowych potrzebowalibyśmy również znaleźć gradnient funkcji kosztu."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Co sprowadza się do bardziej ogólnego problemu:<br/>jak obliczyć gradient $\\nabla f(x)$ dla danej funkcji $f$ i wektora wejściowego $x$?"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Pochodna funkcji\n",
|
|||
|
"\n",
|
|||
|
"* **Pochodna** mierzy, jak szybko zmienia się wartość funkcji względem zmiany jej argumentów:\n",
|
|||
|
"\n",
|
|||
|
"$$ \\frac{d f(x)}{d x} = \\lim_{h \\to 0} \\frac{ f(x + h) - f(x) }{ h } $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Pochodna cząstkowa i gradient\n",
|
|||
|
"\n",
|
|||
|
"* **Pochodna cząstkowa** mierzy, jak szybko zmienia się wartość funkcji względem zmiany jej *pojedynczego argumentu*."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* **Gradient** to wektor pochodnych cząstkowych:\n",
|
|||
|
"\n",
|
|||
|
"$$ \\nabla f = \\left( \\frac{\\partial f}{\\partial x_1}, \\ldots, \\frac{\\partial f}{\\partial x_n} \\right) $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Gradient – przykłady\n",
|
|||
|
"\n",
|
|||
|
"$$ f(x_1, x_2) = x_1 + x_2 \\qquad \\to \\qquad \\frac{\\partial f}{\\partial x_1} = 1, \\quad \\frac{\\partial f}{\\partial x_2} = 1, \\quad \\nabla f = (1, 1) $$ "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"$$ f(x_1, x_2) = x_1 \\cdot x_2 \\qquad \\to \\qquad \\frac{\\partial f}{\\partial x_1} = x_2, \\quad \\frac{\\partial f}{\\partial x_2} = x_1, \\quad \\nabla f = (x_2, x_1) $$ "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"$$ f(x_1, x_2) = \\max(x_1 + x_2) \\hskip{12em} \\\\\n",
|
|||
|
"\\to \\qquad \\frac{\\partial f}{\\partial x_1} = \\mathbb{1}_{x \\geq y}, \\quad \\frac{\\partial f}{\\partial x_2} = \\mathbb{1}_{y \\geq x}, \\quad \\nabla f = (\\mathbb{1}_{x \\geq y}, \\mathbb{1}_{y \\geq x}) $$ "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Własności pochodnych cząstkowych\n",
|
|||
|
"\n",
|
|||
|
"Jezeli $f(x, y, z) = (x + y) \\, z$ oraz $x + y = q$, to:\n",
|
|||
|
"$$f = q z,\n",
|
|||
|
"\\quad \\frac{\\partial f}{\\partial q} = z,\n",
|
|||
|
"\\quad \\frac{\\partial f}{\\partial z} = q,\n",
|
|||
|
"\\quad \\frac{\\partial q}{\\partial x} = 1,\n",
|
|||
|
"\\quad \\frac{\\partial q}{\\partial y} = 1 $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Reguła łańcuchowa\n",
|
|||
|
"\n",
|
|||
|
"$$ \\frac{\\partial f}{\\partial x} = \\frac{\\partial f}{\\partial q} \\, \\frac{\\partial q}{\\partial x},\n",
|
|||
|
"\\quad \\frac{\\partial f}{\\partial y} = \\frac{\\partial f}{\\partial q} \\, \\frac{\\partial q}{\\partial y} $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Propagacja wsteczna – prosty przykład"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 17,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Dla ustalonego wejścia\n",
|
|||
|
"x = -2; y = 5; z = -4"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 18,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"(3, -12)\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Krok w przód\n",
|
|||
|
"q = x + y\n",
|
|||
|
"f = q * z\n",
|
|||
|
"print(q, f)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 19,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[-4, -4, 3]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Propagacja wsteczna dla f = q * z\n",
|
|||
|
"dz = q\n",
|
|||
|
"dq = z\n",
|
|||
|
"# Propagacja wsteczna dla q = x + y\n",
|
|||
|
"dx = 1 * dq # z reguły łańcuchowej\n",
|
|||
|
"dy = 1 * dq # z reguły łańcuchowej\n",
|
|||
|
"print([dx, dy, dz])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"<img src=\"exp1.png\" />"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Właśnie tak wygląda obliczanie pochodnych metodą propagacji wstecznej!"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Spróbujmy czegoś bardziej skomplikowanego:<br/>metodą propagacji wstecznej obliczmy pochodną funkcji sigmoidalnej."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Propagacja wsteczna – funkcja sigmoidalna"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Funkcja sigmoidalna:\n",
|
|||
|
"\n",
|
|||
|
"$$f(\\theta,x) = \\frac{1}{1+e^{-(\\theta_0 x_0 + \\theta_1 x_1 + \\theta_2)}}$$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"<img src=\"exp2.png\" />"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 20,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[0.3932238664829637, -0.5898357997244456]\n",
|
|||
|
"[-0.19661193324148185, -0.3932238664829637, 0.19661193324148185]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import math\n",
|
|||
|
"\n",
|
|||
|
"# Losowe wagi i dane\n",
|
|||
|
"w = [2,-3,-3]\n",
|
|||
|
"x = [-1, -2]\n",
|
|||
|
"\n",
|
|||
|
"# Krok w przód\n",
|
|||
|
"dot = w[0]*x[0] + w[1]*x[1] + w[2]\n",
|
|||
|
"f = 1.0 / (1 + math.exp(-dot)) # funkcja sigmoidalna\n",
|
|||
|
"\n",
|
|||
|
"# Krok w tył\n",
|
|||
|
"ddot = (1 - f) * f # pochodna funkcji sigmoidalnej\n",
|
|||
|
"dx = [w[0] * ddot, w[1] * ddot]\n",
|
|||
|
"dw = [x[0] * ddot, x[1] * ddot, 1.0 * ddot]\n",
|
|||
|
"\n",
|
|||
|
"print(dx)\n",
|
|||
|
"print(dw)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Obliczanie gradientów – podsumowanie\n",
|
|||
|
"\n",
|
|||
|
"* Gradient $f$ dla $x$ mówi jak zmieni się całe wyrażenie przy zmianie wartości $x$.\n",
|
|||
|
"* Gradienty łączymy korzystając z **reguły łańcuchowej**.\n",
|
|||
|
"* W kroku wstecz gradienty informują, które części grafu powinny być zwiększone lub zmniejszone (i z jaką siłą), aby zwiększyć wartość na wyjściu.\n",
|
|||
|
"* W kontekście implementacji chcemy dzielić funkcję $f$ na części, dla których można łatwo obliczyć gradienty."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## 5.4. Uczenie wielowarstwowych sieci neuronowych metodą propagacji wstecznej"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Mając algorytm SGD oraz gradienty wszystkich wag, moglibyśmy trenować każdą sieć."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Niech:\n",
|
|||
|
"$$\\Theta = (\\Theta^{(1)},\\Theta^{(2)},\\Theta^{(3)},\\beta^{(1)},\\beta^{(2)},\\beta^{(3)})$$\n",
|
|||
|
"\n",
|
|||
|
"* Funkcja sieci neuronowej z grafiki:\n",
|
|||
|
"\n",
|
|||
|
"$$\\small h_\\Theta(x) = \\tanh(\\tanh(\\tanh(x\\Theta^{(1)}+\\beta^{(1)})\\Theta^{(2)} + \\beta^{(2)})\\Theta^{(3)} + \\beta^{(3)})$$\n",
|
|||
|
"* Funkcja kosztu dla regresji:\n",
|
|||
|
"$$J(\\Theta) = \\dfrac{1}{2m} \\sum_{i=1}^{m} (h_\\Theta(x^{(i)})- y^{(i)})^2 $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Jak obliczymy gradienty?\n",
|
|||
|
"\n",
|
|||
|
"$$\\nabla_{\\Theta^{(l)}} J(\\Theta) = ? \\quad \\nabla_{\\beta^{(l)}} J(\\Theta) = ?$$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### W kierunku propagacji wstecznej\n",
|
|||
|
"\n",
|
|||
|
"* Pewna (niewielka) zmiana wagi $\\Delta z^l_j$ dla $j$-ego neuronu w warstwie $l$ pociąga za sobą (niewielką) zmianę kosztu: \n",
|
|||
|
"\n",
|
|||
|
"$$\\frac{\\partial J(\\Theta)}{\\partial z^{l}_j} \\Delta z^{l}_j$$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Jeżeli $\\frac{\\partial J(\\Theta)}{\\partial z^{l}_j}$ jest duża, $\\Delta z^l_j$ ze znakiem przeciwnym zredukuje koszt.\n",
|
|||
|
"* Jeżeli $\\frac{\\partial J(\\Theta)}{\\partial z^l_j}$ jest bliska zeru, koszt nie będzie mocno poprawiony."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Definiujemy błąd $\\delta^l_j$ neuronu $j$ w warstwie $l$: \n",
|
|||
|
"\n",
|
|||
|
"$$\\delta^l_j \\equiv \\dfrac{\\partial J(\\Theta)}{\\partial z^l_j}$$ \n",
|
|||
|
"$$\\delta^l \\equiv \\nabla_{z^l} J(\\Theta) \\textrm{ (zapis wektorowy)} $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Podstawowe równania propagacji wstecznej\n",
|
|||
|
"\n",
|
|||
|
"$$\n",
|
|||
|
"\\begin{array}{ccll}\n",
|
|||
|
"\\delta^L & = & \\nabla_{a^L}J(\\Theta) \\odot {(g^{L})}^{\\prime}(z^L) & (BP1) \\\\[2mm]\n",
|
|||
|
"\\delta^{l} & = & ((\\Theta^{l+1})^T \\delta^{l+1}) \\odot {{(g^{l})}^{\\prime}}(z^{l}) & (BP2)\\\\[2mm]\n",
|
|||
|
"\\nabla_{\\beta^l} J(\\Theta) & = & \\delta^l & (BP3)\\\\[2mm]\n",
|
|||
|
"\\nabla_{\\Theta^l} J(\\Theta) & = & a^{l-1} \\odot \\delta^l & (BP4)\\\\\n",
|
|||
|
"\\end{array}\n",
|
|||
|
"$$\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Algorytm propagacji wstecznej"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Dla jednego przykładu (x,y):\n",
|
|||
|
"\n",
|
|||
|
"1. **Wejście**: Ustaw aktywacje w warstwie cech $a^{(0)}=x$ \n",
|
|||
|
"2. **Feedforward:** dla $l=1,\\dots,L$ oblicz \n",
|
|||
|
"$$z^{(l)} = a^{(l-1)} \\Theta^{(l)} + \\beta^{(l)} \\textrm{ oraz } a^{(l)}=g^{(l)}(z^{(l)})$$\n",
|
|||
|
"3. **Błąd wyjścia $\\delta^{(L)}$:** oblicz wektor $$\\delta^{(L)}= \\nabla_{a^{(L)}}J(\\Theta) \\odot {g^{\\prime}}^{(L)}(z^{(L)})$$\n",
|
|||
|
"4. **Propagacja wsteczna błędu:** dla $l = L-1,L-2,\\dots,1$ oblicz $$\\delta^{(l)} = \\delta^{(l+1)}(\\Theta^{(l+1)})^T \\odot {g^{\\prime}}^{(l)}(z^{(l)})$$\n",
|
|||
|
"5. **Gradienty:** \n",
|
|||
|
" * $\\dfrac{\\partial}{\\partial \\Theta_{ij}^{(l)}} J(\\Theta) = a_i^{(l-1)}\\delta_j^{(l)} \\textrm{ oraz } \\dfrac{\\partial}{\\partial \\beta_{j}^{(l)}} J(\\Theta) = \\delta_j^{(l)}$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"W naszym przykładzie:\n",
|
|||
|
"\n",
|
|||
|
"$$\\small J(\\Theta) = \\frac{1}{2}(a^{(L)} - y)^2 $$\n",
|
|||
|
"$$\\small \\dfrac{\\partial}{\\partial a^{(L)}} J(\\Theta) = a^{(L)} - y$$\n",
|
|||
|
"\n",
|
|||
|
"$$\\small \\tanh^{\\prime}(x) = 1 - \\tanh^2(x)$$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"<img src=\"nn3.png\" />"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Algorytm SGD z propagacją wsteczną\n",
|
|||
|
"\n",
|
|||
|
"Pojedyncza iteracja:\n",
|
|||
|
"* Dla parametrów $\\Theta = (\\Theta^{(1)},\\ldots,\\Theta^{(L)})$ utwórz pomocnicze macierze zerowe $\\Delta = (\\Delta^{(1)},\\ldots,\\Delta^{(L)})$ o takich samych wymiarach (dla uproszczenia opuszczono wagi $\\beta$).\n",
|
|||
|
"* Dla $m$ przykładów we wsadzie (_batch_), $i = 1,\\ldots,m$:\n",
|
|||
|
" * Wykonaj algortym propagacji wstecznej dla przykładu $(x^{(i)}, y^{(i)})$ i przechowaj gradienty $\\nabla_{\\Theta}J^{(i)}(\\Theta)$ dla tego przykładu;\n",
|
|||
|
" * $\\Delta := \\Delta + \\dfrac{1}{m}\\nabla_{\\Theta}J^{(i)}(\\Theta)$\n",
|
|||
|
"* Wykonaj aktualizację wag: $\\Theta := \\Theta - \\alpha \\Delta$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Propagacja wsteczna – podsumowanie\n",
|
|||
|
"\n",
|
|||
|
"* Algorytm pierwszy raz wprowadzony w latach 70. XX w.\n",
|
|||
|
"* W 1986 David Rumelhart, Geoffrey Hinton i Ronald Williams pokazali, że jest znacznie szybszy od wcześniejszych metod.\n",
|
|||
|
"* Obecnie najpopularniejszy algorytm uczenia sieci neuronowych."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## 5.5. Implementacja sieci neuronowych"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 21,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style>\n",
|
|||
|
" .dataframe thead tr:only-child th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: left;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>łod.dł.</th>\n",
|
|||
|
" <th>łod.sz.</th>\n",
|
|||
|
" <th>pł.dł.</th>\n",
|
|||
|
" <th>pł.sz.</th>\n",
|
|||
|
" <th>Iris setosa?</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>5.2</td>\n",
|
|||
|
" <td>3.4</td>\n",
|
|||
|
" <td>1.4</td>\n",
|
|||
|
" <td>0.2</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>5.1</td>\n",
|
|||
|
" <td>3.7</td>\n",
|
|||
|
" <td>1.5</td>\n",
|
|||
|
" <td>0.4</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>6.7</td>\n",
|
|||
|
" <td>3.1</td>\n",
|
|||
|
" <td>5.6</td>\n",
|
|||
|
" <td>2.4</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>6.5</td>\n",
|
|||
|
" <td>3.2</td>\n",
|
|||
|
" <td>5.1</td>\n",
|
|||
|
" <td>2.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>4.9</td>\n",
|
|||
|
" <td>2.5</td>\n",
|
|||
|
" <td>4.5</td>\n",
|
|||
|
" <td>1.7</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>5</th>\n",
|
|||
|
" <td>6.0</td>\n",
|
|||
|
" <td>2.7</td>\n",
|
|||
|
" <td>5.1</td>\n",
|
|||
|
" <td>1.6</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" łod.dł. łod.sz. pł.dł. pł.sz. Iris setosa?\n",
|
|||
|
"0 5.2 3.4 1.4 0.2 1.0\n",
|
|||
|
"1 5.1 3.7 1.5 0.4 1.0\n",
|
|||
|
"2 6.7 3.1 5.6 2.4 0.0\n",
|
|||
|
"3 6.5 3.2 5.1 2.0 0.0\n",
|
|||
|
"4 4.9 2.5 4.5 1.7 0.0\n",
|
|||
|
"5 6.0 2.7 5.1 1.6 0.0"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 21,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas\n",
|
|||
|
"src_cols = ['łod.dł.', 'łod.sz.', 'pł.dł.', 'pł.sz.', 'Gatunek']\n",
|
|||
|
"trg_cols = ['łod.dł.', 'łod.sz.', 'pł.dł.', 'pł.sz.', 'Iris setosa?']\n",
|
|||
|
"data = (\n",
|
|||
|
" pandas.read_csv('iris.csv', usecols=src_cols)\n",
|
|||
|
" .apply(lambda x: [x[0], x[1], x[2], x[3], 1 if x[4] == 'Iris-setosa' else 0], axis=1))\n",
|
|||
|
"data.columns = trg_cols\n",
|
|||
|
"data[:6]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 22,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[[ 1. 5.2 3.4 1.4 0.2]\n",
|
|||
|
" [ 1. 5.1 3.7 1.5 0.4]\n",
|
|||
|
" [ 1. 6.7 3.1 5.6 2.4]\n",
|
|||
|
" [ 1. 6.5 3.2 5.1 2. ]\n",
|
|||
|
" [ 1. 4.9 2.5 4.5 1.7]\n",
|
|||
|
" [ 1. 6. 2.7 5.1 1.6]]\n",
|
|||
|
"[[ 1.]\n",
|
|||
|
" [ 1.]\n",
|
|||
|
" [ 0.]\n",
|
|||
|
" [ 0.]\n",
|
|||
|
" [ 0.]\n",
|
|||
|
" [ 0.]]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"m, n_plus_1 = data.values.shape\n",
|
|||
|
"n = n_plus_1 - 1\n",
|
|||
|
"Xn = data.values[:, 0:n].reshape(m, n)\n",
|
|||
|
"X = np.matrix(np.concatenate((np.ones((m, 1)), Xn), axis=1)).reshape(m, n_plus_1)\n",
|
|||
|
"Y = np.matrix(data.values[:, n]).reshape(m, 1)\n",
|
|||
|
"\n",
|
|||
|
"print(X[:6])\n",
|
|||
|
"print(Y[:6])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 23,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Using TensorFlow backend.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Epoch 1/10\n",
|
|||
|
"150/150 [==============================] - 0s - loss: 2.0678 - acc: 0.6667 \n",
|
|||
|
"Epoch 2/10\n",
|
|||
|
"150/150 [==============================] - 0s - loss: 1.9711 - acc: 0.6667 \n",
|
|||
|
"Epoch 3/10\n",
|
|||
|
"150/150 [==============================] - 0s - loss: 1.8811 - acc: 0.6667 \n",
|
|||
|
"Epoch 4/10\n",
|
|||
|
"150/150 [==============================] - 0s - loss: 1.7793 - acc: 0.6667 \n",
|
|||
|
"Epoch 5/10\n",
|
|||
|
"150/150 [==============================] - 0s - loss: 1.6948 - acc: 0.6667 \n",
|
|||
|
"Epoch 6/10\n",
|
|||
|
"150/150 [==============================] - 0s - loss: 1.5993 - acc: 0.6667 \n",
|
|||
|
"Epoch 7/10\n",
|
|||
|
"150/150 [==============================] - 0s - loss: 1.5162 - acc: 0.6667 \n",
|
|||
|
"Epoch 8/10\n",
|
|||
|
"150/150 [==============================] - 0s - loss: 1.4308 - acc: 0.6667 \n",
|
|||
|
"Epoch 9/10\n",
|
|||
|
"150/150 [==============================] - 0s - loss: 1.3487 - acc: 0.6667 \n",
|
|||
|
"Epoch 10/10\n",
|
|||
|
"150/150 [==============================] - 0s - loss: 1.2676 - acc: 0.6667 \n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<keras.callbacks.History at 0x7f87f40aa150>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 23,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from keras.models import Sequential\n",
|
|||
|
"from keras.layers import Dense\n",
|
|||
|
"\n",
|
|||
|
"model = Sequential()\n",
|
|||
|
"model.add(Dense(3, input_dim=5))\n",
|
|||
|
"model.add(Dense(3))\n",
|
|||
|
"model.add(Dense(1, activation='sigmoid'))\n",
|
|||
|
"\n",
|
|||
|
"model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
|
|||
|
"\n",
|
|||
|
"model.fit(X, Y)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 24,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"0.8209257125854492"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 24,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"model.predict(np.array([1.0, 3.0, 1.0, 2.0, 4.0]).reshape(-1, 5)).tolist()[0][0]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## 5.6. Funkcje aktywacji"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Każda funkcja aktywacji ma swoje zalety i wady.\n",
|
|||
|
"* Różne rodzaje funkcji aktywacji nadają się do różnych zastosowań."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 25,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import math\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import random\n",
|
|||
|
"\n",
|
|||
|
"import keras\n",
|
|||
|
"from keras.datasets import mnist\n",
|
|||
|
"from keras.models import Sequential\n",
|
|||
|
"from keras.layers import Dense, Dropout, SimpleRNN, LSTM\n",
|
|||
|
"from keras.optimizers import Adagrad, Adam, RMSprop, SGD\n",
|
|||
|
"\n",
|
|||
|
"from IPython.display import YouTubeVideo"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 26,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def plot(fun):\n",
|
|||
|
" x = np.arange(-3.0, 3.0, 0.01)\n",
|
|||
|
" y = [fun(x_i) for x_i in x]\n",
|
|||
|
" fig = plt.figure(figsize=(14, 7))\n",
|
|||
|
" ax = fig.add_subplot(111)\n",
|
|||
|
" fig.subplots_adjust(left=0.1, right=0.9, bottom=0.1, top=0.9)\n",
|
|||
|
" ax.set_xlim(-3.0, 3.0)\n",
|
|||
|
" ax.set_ylim(-1.5, 1.5)\n",
|
|||
|
" ax.grid()\n",
|
|||
|
" ax.plot(x, y)\n",
|
|||
|
" plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Funkcja logistyczna"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"$$ g(x) = \\frac{1}{1 + e^{-x}} $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Przyjmuje wartości z przedziału $(0, 1)$."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"#### Funkcja logistyczna – wykres"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 27,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2cAAAG9CAYAAACRcQ4FAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3XmUnOld2Pvvr/d9qVa39m5JI41m\npNlHs4B9bXmwPXZibIgXILZjE+MBEgiG41zswAlmyQVuiOEe35DDEILZTojNvcEYfA0YW4MDnn3x\njMazat+l3vf1uX9UdUvqac20Rl2qt7u/n3PqVNVb71vz6Ph1q7963/d5I6WEJEmSJKm0yko9AEmS\nJEmScSZJkiRJmWCcSZIkSVIGGGeSJEmSlAHGmSRJkiRlgHEmSZIkSRlQtDiLiE0R8bmI+FZEjERE\niogti9z2UGH9+Y/vK9Z4JUmSJKmUKor43duBDwCPAd8E3n6Z2/818Jl5y56/8mFJkiRJUvYUM87+\nPqW0FiAifoTLj7NzKaUHl35YkiRJkpQ9RTutMaU0U6zvliRJkqSVJssTgnxv4Vq18Yh40OvNJEmS\nJK1kWY2zLwM/CdwLfBAYA/5nRHyopKOSJEmSpCKJlFLx/yP5a85+F9iaUjr0OrYvBx4E1qWUNl9i\nnfuA+wBqampu7+zsfP0D1oo0MzNDWVlW/z1CpeS+oYW4X+hS3De0EPcLLeSFF144l1JqX+z6xZwQ\nZMmklKYj4ovAr0fE+pTSyQXWuR+4H2Dnzp3p+eed2FEX27dvH3v37i31MJRB7htaiPuFLsV9Qwtx\nv9BCIuLw5ay/HPO++If6JEmSJOkqWxZxFhEVwA8AR1JKp0o9HkmSJElaakU9rTEi3ld4eXvh+Z0R\ncRY4m1J6oLDOFPAHKaWPFd7/EPAe4CvAUWAt8K+B24AfKuZ4JUmSJKlUin3N2Rfnvf/twvMDwN7C\n6/LCY9ZBoAP4j0AOGAYeBd6RUvrroo1UkiRJkkqoqHGWUorLXSel9CBwT9EGJUmSJEkZtCyuOZMk\nSZKklc44kyRJkqQMMM4kSZIkKQOMM0mSJEnKAONMkiRJkjLAOJMkSZKkDDDOJEmSJCkDjDNJkiRJ\nygDjTJIkSZIywDiTJEmSpAwwziRJkiQpA4wzSZIkScoA40ySJEmSMsA4kyRJkqQMMM4kSZIkKQOM\nM0mSJEnKAONMkiRJkjLAOJMkSZKkDDDOJEmSJCkDjDNJkiRJygDjTJIkSZIywDiTJEmSpAwwziRJ\nkiQpA4wzSZIkScoA40ySJEmSMsA4kyRJkqQMMM4kSZIkKQOMM0mSJEnKAONMkiRJkjLAOJMkSZKk\nDDDOJEmSJCkDjDNJkiRJygDjTJIkSZIywDiTJEmSpAwwziRJkiQpA4wzSZIkScoA40ySJEmSMsA4\nkyRJkqQMMM4kSZIkKQOMM0mSJEnKAONMkiRJkjLAOJMkSZKkDDDOJEmSJCkDjDNJkiRJygDjTJIk\nSZIywDiTJEmSpAwwziRJkiQpA4wzSZIkScoA40ySJEmSMsA4kyRJkqQMMM4kSZIkKQOMM0mSJEnK\nAONMkiRJkjLAOJMkSZKkDChanEXEpoj4XER8KyJGIiJFxJZFblsWEZ+OiEMRMRYRT0XEe4s1VkmS\nJEkqtWIeOdsOfADoBb55mdv+MvAZ4P8G3gk8CHwxIv7JUg5QkiRJkrKioojf/fcppbUAEfEjwNsX\ns1FEdACfBH4tpfQbhcXfiIjtwK8BXynGYCVJkiSplIp25CylNPM6N70XqAL+eN7yPwZujIitVzQw\nSZIkScqgLE4IshsYB16at3x/4XnX1R2OJEmSJBVfFuMsB/SllNK85T0XfC5JkiRJK0oxrzm7qiLi\nPuA+gPb2dvbt21faASlzhoaG3C+0IPcNLcT9QpfivqGFuF9oKWQxznqBloiIeUfPZo+Y9SywDSml\n+4H7AXbu3Jn27t1b1EFq+dm3bx/uF1qI+4YW4n6hS3Hf0ELcL7QUsnha436gGrhm3vLZa82evbrD\nkSRJkqTiy2KcfRWYBD44b/mHgGdSSgev/pAkSZIkqbiKelpjRLyv8PL2wvM7I+IscDal9EBhnSng\nD1JKHwNIKZ2JiM8Cn46IQeBx4AeAe4B3F3O8kiRJklQqxb7m7Ivz3v924fkBYG/hdXnhcaGfA4aA\nnwLWAc8DH0gp/WVxhilJkiRJr256JjEyMcXw+DTDE1OMjE8zND6VXzYxzfD4FMPjU4xMTFNdcfkn\nKRY1zlJK8XrWSSlNA79SeEiSJEnS6zIzkxiemGJofIqhsSkGC8/D4+dfD43nH4Ozr8cm596PzEbX\nxBRjkzOL/u9uztVe9lizOFujJEmSJDE9kxgam2JgbJL+0UkGRicZGJtkYHSK/tFJBscmXyWwzi9b\njNrKchpqKmisrqChpoKG6go25+poqK6grqqc+uoK6qsqqK8up67wXF9VQV3hub46v05dVX79yvIy\n4mcv789rnEmSJEkqipQSwxPTr4iq2ff511OFzwrvx6byn4/mw+vVREBD9cVB1VRbycaWWhouWHbR\n63kB1lhdSX11ORXlpZ8r0TiTJEmS9JrGp6bpG5mkd2SCvpFJ+kYm6C287y889xaW59ebpH90gsnp\n9Krf21BdQVNNPqpmw+r69Y0011bSVJNfln9dWKemkua6/Pv6qgrKyl7zSqplwziTJEmSVpmJqRl6\nhifoHh6ne2iCnuEJzg2Nn4+v0UJ8DRdia3SSkYnpS35fVUUZrXWVtNZV0VJXyfaOBlrqKmmpq6Jl\nNq7mIquSptoKmmoqaaypyMQRq6wwziRJkqRlbmJqht6RfGD1DE/QPTRB9/AEPYX46h6eoHv2s+EJ\nBscWPl2wLMgHVV0lLbWVrG+u4fr1TbTWVc7FVmtdFa11+aNXrYX3tVXzJ1/X62GcSZIkSRk0NjnN\nuaFxzg4WHkPjnBnIP58bHC/EVz7ILhVb5WVBrr6Ktvoq2hqquLG1Jf+6vopcw+zy6rl1mmoqV9Rp\ngsuNcSZJkiRdJTMzid6RCc5eEF1nBi8IsLkIG2NggeCKgFxdFWsaqmlrqOKGjc350CrEl7G1vBln\nkiRJ0hVKKdE/OsnpgTFO9Y9x6oLn0/1jcwF2bmicqZlXTpBRW1lOR1M17Q3VXLu2gTdc00Z7YzXt\njdV0NNbMvc7VV1HpNVorlnEmSZIkvYqp6RnODo3nY2s2vArRdWpgjNMD4xzvHWHir//mFdvm6qtY\n21TD2qZqrl/fmI+shmraC8HVUYiu+mp/LZdxJkmSpFUspcS5oQlO9I1yom+U432jnOgby7/vH+VU\n/xjnhsaZf7CrsjxY21TDuqYadm1o4tr6cfbs3sHa5vyydU01dDRVU1PpRBlaPONMkiRJK9bY5PQF\n0TXK8dnwmn30jzExNXPRNnVV5WxoqWV9cw3XrWtkXVPNXHStbaphXXMNubqqi67l2rdvH3vftO1q\n//G0whhnkiRJWraGx6c42jvC0Z5RjvSMcKx3pBBe+QjrHp64aP0IWNtYw4aWGm7Y2My9u9exoaW2\n8KhhY0stzbWVRDiJhq4+40ySJEmZNTE1w4m+0bkAyz8XHr2j9MyLr7qqcja15mPrxk3NbCxE14bm\n/LJ1zTVOqKHMMs4kSZJUMiklzg6Oc6RnhKO9IxzpPh9gx3pHOdk/etH1XhVlwcbWWjpzddy7oZnN\nuVo2t9bRmatjc66O1jqPemn5Ms4kSZJUVCklTg+Mc6h7mEPnhjnUPcLh7mEOnhvmcPcIo5PTF63f\n0VhNZ66OO7fm2Nxay6bc+fha11RDufft0gplnEmSJOmKzcwkTg+OzQXXbIjNvh6bPD/pRmV5sDlX\nx5a2er7rmja2tNXT2VbH5tY6NrXWOsOhVi3jTJIkSYvWNzLBy2eHePnsMC+fHeLg2XyAHe65OMCq\nysvYnKtlS1s9b9i+hi1
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f8808515990>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"plot(lambda x: 1 / (1 + math.exp(-x)))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Tangens hiperboliczny"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"$$ g(x) = \\tanh x = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}} $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Przyjmuje wartości z przedziału $(-1, 1)$.\n",
|
|||
|
"* Powstaje z funkcji logistycznej przez przeskalowanie i przesunięcie."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"#### Tangens hiperboliczny – wykres"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 28,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2cAAAG9CAYAAACRcQ4FAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzs3Xd8W/W9//H3V7LkPRM7znI2zh5k\nshM2ZZWy9w4tpXTclu7bUri/Cx20vW25lwTCCJAWKFBWKaMYAmRAErL33rbjeFvW+v7+kDJxwEms\nnGP59Xw89JB0dI78SXsa59Wjc2SstQIAAAAAOMvj9AAAAAAAAOIMAAAAAFyBOAMAAAAAFyDOAAAA\nAMAFiDMAAAAAcAHiDAAAAABcIGFxZozpYYz5kzFmljGm0RhjjTG9W7nthvj6B9++mqh5AQAAAMBJ\nKQl87/6SrpA0T9JMSWcf5vb/kvTLg5atPPqxAAAAAMB9EhlnH1hru0iSMeY2HX6cVVprZ7f9WAAA\nAADgPgn7WKO1Npqo9wYAAACAZOPmC4JcGD9XrdkYM5vzzQAAAAAkM7fG2auSviXpHEnXSgpIeskY\nc52jUwEAAABAghhrbeJ/SOycs6mS+lhrNxzB9l5JsyUVW2t7HmKdyZImS1JaWtrokpKSIx8YSSka\njcrjcev/HwEnsW+gJewXOBT2DbSE/QItWbVqVaW1trC16yfygiBtxlobMcY8L+lBY0xXa+32FtaZ\nImmKJJWWltqVK7mwIw5UVlamiRMnOj0GXIh9Ay1hv8ChsG+gJewXaIkxZuPhrN8e8z7xh/oAAAAA\n4BhrF3FmjEmRdKWkTdbaHU7PAwAAAABtLaEfazTGXBZ/ODp+f54xpkJShbX2/fg6YUlPWmtvjT+/\nWtLFkt6QtFlSF0nflHS8pKsTOS8AAAAAOCXR55w9f9Dzh+P370uaGH/sjd/2WC+pSNJvJBVIapD0\nqaRzrbX/StikAAAAAOCghMaZtdYc7jrW2tmSTk/YUAAAAADgQu3inDMAAAAASHbEGQAAAAC4AHEG\nAAAAAC5AnAEAAACACxBnAAAAAOACxBkAAAAAuABxBgAAAAAuQJwBAAAAgAsQZwAAAADgAsQZAAAA\nALgAcQYAAAAALkCcAQAAAIALEGcAAAAA4ALEGQAAAAC4AHEGAAAAAC5AnAEAAACACxBnAAAAAOAC\nxBkAAAAAuABxBgAAAAAuQJwBAAAAgAsQZwAAAADgAsQZAAAAALgAcQYAAAAALkCcAQAAAIALEGcA\nAAAA4ALEGQAAAAC4AHEGAAAAAC5AnAEAAACACxBnAAAAAOACxBkAAAAAuABxBgAAAAAuQJwBAAAA\ngAsQZwAAAADgAsQZAAAAALgAcQYAAAAALkCcAQAAAIALEGcAAAAA4ALEGQAAAAC4AHEGAAAAAC5A\nnAEAAACACxBnAAAAAOACxBkAAAAAuABxBgAAAAAuQJwBAAAAgAsQZwAAAADgAsQZAAAAALgAcQYA\nAAAALkCcAQAAAIALEGcAAAAA4ALEGQAAAAC4AHEGAAAAAC5AnAEAAACACxBnAAAAAOACxBkAAAAA\nuEDC4swY08MY8ydjzCxjTKMxxhpjerdyW48x5sfGmA3GmIAxZqEx5tJEzQoAAAAATkvkkbP+kq6Q\ntFvSzMPc9j5Jv5T0Z0nnSZot6XljzFfackAAAAAAcIuUBL73B9baLpJkjLlN0tmt2cgYUyTp+5Ie\nsNb+Nr74PWNMf0kPSHojEcMCAAAAgJMSduTMWhs9wk3PkeSX9PRBy5+WNMwY0+eoBgMAAAAAF3Lj\nBUGGSGqWtOag5Uvj94OP7TgAAAAAkHiJ/FjjkSqQVG2ttQctr9rvdQAAAAA4KtGoVcRaRaJW0T33\nUX1u2QGvW6tIVHsfhw9+Pf6efu/hHwdzY5wdEWPMZEmTJamwsFBlZWXODgTXqa+vZ79Ai9g30BL2\nCxwK+wZa0lH3i1icKHazUji63/P441BUilgp9CWvReKvRWwsjqJWsQiy+5ZFooot33OL2n3PD1o3\nesDzWIiFrQ5434OPBrWlTmnmsLdxY5ztlpRnjDEHHT3bc8SsqoVtZK2dImmKJJWWltqJEycmdEi0\nP2VlZWK/QEvYN9AS9gscCvsGWuKW/SIYjqoxGFZjMKLGYFhNwaiaQhEF9tzCUQVCETWHIgqEovFl\n+z0ORRUIx15v2n+d+ONgJKpgOKpQ/D4cbdu88RgpxetRisfEbi099hp5vR75vEZ+j5HP45E3vvzA\n9fbf1iglvp7Pa+T1xLb3mNjrHo+R12PkNfHHRvLuWW723e+/zOtRbPv4+xywvccoLcWr4fce3p/f\njXG2VFKqpH468LyzPeeaLTvmEwEAAAAJEI5EVRcIqzYQOuC+PhBWYzCshmBEjc3x+2BYDc0H3R/0\neihy+LEUCwmP0nxepfm8SvV5lJbiVZovtiw33Rd7nBJ7ze/1yOf1yJ8Su/m8HqWm7Fu29zWvR/4U\nI7/XGwup/db1H7T+nu29nsM/2pRM3Bhnb0oKSbpW0v6teZ2kJdba9Y5MBQAAABzEWqv65rAqGqNa\ntKVauxtDqm4MqroxpLq9wbUvuvYua4rdN4Uirfo5mX6vMlJTYvf+FGWmepWX4Vf3/PjzFl5P96co\n3bcvsvYPrlSfJ/6aV74jODcKiZHQODPGXBZ/ODp+f54xpkJShbX2/fg6YUlPWmtvlSRrbbkx5iFJ\nPzbG1EmaL+lKSadLuiiR8wIAAKBja2gOq7K+OX4LandDcG9w7W7c//G+CNv70b4PPvrc+6WmeJSd\n5lNOWoqy01KUk+5T19w0Zaf6lJ2WEnstPXafvWedNJ+yUlOUmRqLrLQUrzwd/IhSR5HoI2fPH/T8\n4fj9+5Imxh9747f9/VRSvaRvSyqWtFLSFdba1xIzJgAAAJJVUzCi8rqAKuubVVEXVEV9syrrmuPP\n94VYZX2zGoMtH8nyez3Ky/ApP8OvvAyf+hdmKT/Tp7wMv/IzfNq5aZ0mHD9cBfFluemx2EpNOfif\nucChJTTOrLVfmvgtrWOtjUi6P34DAAAAPsdaq9pAWDtrA9peE9COmqb4fez5nuU1TaEWty/I9Ktz\nll+ds1I1qiRPnbNS4ze/CrNjj/cEWYbfK2MO/U/bsrLNmji4S6L+qOgg3HjOGQAAACBrrSrqm7W5\nqklbdjdqy+4mba6K3W+radKOmsDnjnQZI3XOSlVxTpp6FmRoXJ8CdclJU5ectL0hVpSdqoJMv1I4\n1wouQ5wBAADAMYFQRBt3NWp9Zb027mrU5t2NB8RYczh6wPqds/zqkZ+hQcU5mlRapOKcNBXnpqlr\nbuy+KDtN/hSiC+0TcQYAAICECkei2lrdpHWVDVpf0aD1lftu22qatP832+am+9QjP10DirJ1+sAi\n9SzIUI/8dPXMz1CP/Ayl+zmHC8mLOAMAAECbCIajWl/ZoFU76/be1pTXa1NV4wHfv5WdlqK+nTM1\ntne++nTuqT6FmerTKVMlnTKUm+5z8E8AOIs4AwAAwGGJRq027GrQyh11WrWzfm+Ira9s2HtZeY+R\nenfO1ICiLJ09pFh9Omeqb+dM9e6cqU6Z/i+8uAbQURFnAAAAOKRQJKrVO+u1ZFuNlm2r1dL4fUP8\nQhzGSCUFGRpQlK2zBndRaXG2BhRlq29hptJ8fAQROBzEGQAAACTFQmzF9jp9tqVaS7fWaMm2Gq3a\nUa9gJHZRjgy/V4O75uiy0T00pFuuBnaNhRjngQFtgzgDAADooHbUBLRg024t2FytBZt2a9GWmr1X\nR8zP8GlIt1zdfFJvDemeqyHdctS7U6a8Hj6OCCQKcQYAANABBMNRLd5ao3kbq7RgU7U+21yt7TUB\nSZI/xaOh3XJ03YReGlWSp5E989Q9L53zwoBjjDgDAABIQoFQRJ9trtacdVWau2GX5m3crUAodlSs\nJP7lzKN65mlUSb4Gdc3
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f87f1c714d0>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"plot(lambda x: math.tanh(x))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### ReLU (_Rectifier Linear Unit_)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"$$ g(x) = \\max(0, x) $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"#### ReLU – zalety\n",
|
|||
|
"* Mniej podatna na problem zanikającego gradientu (_vanishing gradient_) niż funkcje sigmoidalne, dzięki czemu SGD jest szybciej zbieżna.\n",
|
|||
|
"* Prostsze obliczanie gradientu.\n",
|
|||
|
"* Dzięki zerowaniu ujemnych wartości, wygasza neurony, „rozrzedzając” sieć (_sparsity_), co przyspiesza obliczenia."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"#### ReLU – wady\n",
|
|||
|
"* Dla dużych wartości gradient może „eksplodować”.\n",
|
|||
|
"* „Wygaszanie” neuronów."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"#### ReLU – wykres"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 29,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2cAAAG9CAYAAACRcQ4FAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAHy9JREFUeJzt3XuUrXdd3/HPNzmQcLFK9BBcSm4E\njyTeKrELq9VD5NpioosAVcBohSPWC8iiFYSlUVFI1UgXlS4SLyC4xEAVCKRYFCZa4RhuShNKQmgS\nkzZZBBMgJwm5cH79Y++RyWTPycyZ/cz+7b1fr7Vmzcyzn2efb+CXybzPs59nV2stAAAAzNYRsx4A\nAAAAcQYAANAFcQYAANABcQYAANABcQYAANABcQYAANCBweKsqr6+ql5bVR+sqturqlXVCZs89prx\n/us/fmCoeQEAAGZp14DPfXKSZyb5SJK/TvKkLR7/50nOWbftiu2PBQAA0J8h4+yvWmvHJklVPS9b\nj7PPttb2T38sAACA/gz2ssbW2sGhnhsAAGDR9HxDkO8fX6t2Z1Xtd70ZAACwyHqNs4uS/EySJyd5\ndpIvJvmzqnrOTKcCAAAYSLXWhv9DRtecXZDkxNbaNYdx/JFJ9id5RGvtkRvssy/JviQ5+uijH3vc\ncccd/sAspIMHD+aII3r9+whmydpgEuuCjSz72rj7YHLDbQdTSR7xkCPygOX9n+Jeln1dMNmVV175\n2dba7s3uP+QNQaamtfalqnprknOr6mtbazdM2Of8JOcnyZ49e9oVV7ixI/e2srKSvXv3znoMOmRt\nMIl1wUaWeW1cceOt+aEL9mf3kZW37PvOnPg1D5n1SN1Y5nXBxqrq2q3sP495P/ypPgAA7mU1zB4g\nzGAwcxFnVbUrybOS/ENr7cZZzwMAsEyEGeyMQV/WWFVnjb987PjzU6vqpiQ3tdYuGe9zT5I3ttZ+\nfPz9DyU5M8nFSa5LcmySn0ry7Ul+aMh5AQC4N2EGO2foa87euu77140/X5Jk7/jrI8cfq65O8vAk\nv5HkmCS3Jflwkqe01v58sEkBALgXYQY7a9A4a63VVvdpre1PcvpgQwEAcL+EGey8ubjmDACAnSPM\nYDbEGQAA/0SYweyIMwAAkggzmDVxBgCAMIMOiDMAgCUnzKAP4gwAYIkJM+iHOAMAWFLCDPoizgAA\nlpAwg/6IMwCAJSPMoE/iDABgiQgz6Jc4AwBYEsIM+ibOAACWgDCD/okzAIAFJ8xgPogzAIAFJsxg\nfogzAIAFJcxgvogzAIAFJMxg/ogzAIAFI8xgPokzAIAFIsxgfokzAIAFIcxgvokzAIAFIMxg/okz\nAIA5J8xgMYgzAIA5JsxgcYgzAIA5JcxgsYgzAIA5JMxg8YgzAIA5I8xgMYkzAIA5IsxgcYkzAIA5\nIcxgsYkzAIA5IMxg8YkzAIDOCTNYDuIMAKBjwgyWhzgDAOiUMIPlIs4AADokzGD5iDMAgM4IM1hO\n4gwAoCPCDJaXOAMA6IQwg+UmzgAAOiDMAHEGADBjwgxIxBkAwEwJM2CVOAMAmBFhBqwlzgAAZkCY\nAeuJMwCAHSbMgEnEGQDADhJmwEbEGQDADhFmwKGIMwCAHSDMgPsjzgAABibMgM0QZwAAAxJmwGaJ\nMwCAgQgzYCvEGQDAAIQZsFXiDABgyoQZcDjEGQDAFAkz4HCJMwCAKRFmwHaIMwCAKRBmwHaJMwCA\nbRJmwDSIMwCAbRBmwLQMFmdV9fVV9dqq+mBV3V5VrapO2OSxR1TVy6rqmqr6YlX9fVU9fahZAQAO\nhzADpmnIM2cnJ3lmkluS/PUWj/3VJOck+S9Jnppkf5K3VtW/nuaAAACHS5gB07ZrwOf+q9basUlS\nVc9L8qTNHFRVD0/ykiSvbq395njz+6vq5CSvTnLxEMMCAGyWMAOGMNiZs9bawcM89MlJHpjkzeu2\nvznJN1fVidsaDABgG66/9aAwAwbR4w1BTk1yZ5Kr1m2/fPz5lJ0dBwBg5Iobb825l94hzIBB9Bhn\nxyT5XGutrdt+85rHAQB21OpLGY88QpgBwxjymrMdVVX7kuxLkt27d2dlZWW2A9GdAwcOWBdMZG0w\niXXBWtffejDnXnpHjjyi8rPfdDDXXvahXDvroeiKnxlMQ49xdkuSr6qqWnf2bPWM2c0Tjklr7fwk\n5yfJnj172t69ewcdkvmzsrIS64JJrA0msS5YdcWNt+bFF+zPgx90VN6y7ztz7WUfsja4Dz8zmIYe\nX9Z4eZKjkjxq3fbVa80+sbPjAADLyl0ZgZ3UY5y9J8ndSZ69bvtzklzWWrt650cCAJaNMAN22qAv\na6yqs8ZfPnb8+alVdVOSm1prl4z3uSfJG1trP54krbXPVNV5SV5WVbcm+WiSZyU5PckZQ84LAJAI\nM2A2hr7m7K3rvn/d+PMlSfaOvz5y/LHWy5McSPLCJI9IckWSZ7bW3jXMmAAAI8IMmJVB46y1Voez\nT2vtS0leOf4AANgRwgyYpR6vOQMA2HHCDJg1cQYALD1hBvRAnAEAS02YAb0QZwDA0hJmQE/EGQCw\nlIQZ0BtxBgAsHWEG9EicAQBLRZgBvRJnAMDSEGZAz8QZALAUhBnQO3EGACw8YQbMA3EGACw0YQbM\nC3EGACwsYQbME3EGACwkYQbMG3EGACwcYQbMI3EGACwUYQbMK3EGACwMYQbMM3EGACwEYQbMO3EG\nAMw9YQYsAnEGAMw1YQYsCnEGAMwtYQYsEnEGAMwlYQYsGnEGAMwdYQYsInEGAMwVYQYsKnEGAMwN\nYQYsMnEGAMwFYQYsOnEGAHRPmAHLQJwBAF0TZsCyEGcAQLeEGbBMxBkA0CVhBiwbcQYAdEeYActI\nnAEAXRFmwLISZwBAN4QZsMzEGQDQBWEGLDtxBgDMnDADEGcAwIwJM4ARcQYAzIwwA/gycQYAzIQw\nA7g3cQYA7DhhBnBf4gwA2FHCDGAycQYA7BhhBrAxcQYA7AhhBnBo4gwAGJwwA7h/4gwAGJQwA9gc\ncQYADEaYAWyeOAMABiHMALZGnAEAUyfMALZOnAEAUyXMAA6POAMApkaYARw+cQYATIUwA9gecQYA\nbJswA9g+cQYAbIswA5gOcQYAHDZhBjA94gwAOCzCDGC6xBkAsGXCDGD6xBkAsCXCDGAYg8ZZVT2y\nqt5WVZ+vqi9U1Z9W1XGbPLZt8PFtQ84MAGxMmAEMZ9dQT1xVD07yviR3Jjk7SUvyyiTvr6pvaa3d\ntomneUOS16/bduU05wQANkeYAQxrsDhL8vwkJyXZ01q7Kkmq6uNJPpXkJ5Kct4nn+L+ttf3DjQgA\nbIYwAxjekC9rPCPJ/tUwS5LW2tVJ/ibJmQP+uQDAFAkzgJ0xZJydmuSyCdsvT3LKJp/jJ6vqzqq6\nvareV1X/anrjAQD3R5gB7JxqrQ3zxFV3JTmvtfbSddtfmeSlrbVDvqSyqt6U5F1J/l+S45P8h4yi\n7omttZUJ++9Lsi9Jdu/e/dgLL7xwGv8YLJADBw7koQ996KzHoEPWBpNYF8n1tx7MuZfekSOPqLz0\nXxydRzzETZ4Ta4PJrAsmefzjH/+R1tppm92/2zib8HxfkdGZuOtaa999qH337NnTrrjiiq2OzIJb\nWVnJ3r17Zz0GHbI2mGTZ14UzZhtb9rXBZNYFk1TVluJsyL8CuyXJwyZsP2b82Ja01m5N8u4k37HN\nuQCAQxBmALMxZJxdntF1Z+udkuQT23jeYU71AQDCDGCGhoyzdyZ5XFWdtLqhqk5I8l3jx7akqv5Z\nkqcluXRK8wEAawgzgNkaMs4uSHJNkndU1ZlVdUaSdyS5LmveWLqqjq+qe6rqF9dse0lVXVBVP1xV\ne6vq7Ixuwf+IJC8fcGYAWErCDGD2BnsT6tbabVV1epLfTvKmJJXkL5O8qLV2YM2uleTI3DsUr0jy\ng+OPr0zyhYzi7Mdba86
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f87f13c0d10>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"plot(lambda x: max(0, x))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Softplus"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"$$ g(x) = \\log(1 + e^{x}) $$"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Wygładzona wersja ReLU."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"#### Softplus – wykres"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 30,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2cAAAG9CAYAAACRcQ4FAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzs3Xl8XWWB//HPkz1N0jZt032nbbrR\nQkEWQakFBAVRQXAUHVfQ+SmKjuP+G0H56TAqowPqgCsuqIgLiw4oS9kXgbZAaUv3vU2XpNn35/dH\nUig1hbTN7TlJPu/X677uveeec/uF10OaL885zwkxRiRJkiRJycpKOoAkSZIkyXImSZIkSalgOZMk\nSZKkFLCcSZIkSVIKWM4kSZIkKQUsZ5IkSZKUAhkrZyGEsSGEa0MIj4YQ6kMIMYQwsZvHruvcf//H\n2zKVV5IkSZKSlJPB754CXAQ8BTwIvPEgj78LuGK/bSsOP5YkSZIkpU8my9kDMcYRACGED3Pw5Wxn\njPGxno8lSZIkSemTsdMaY4ztmfpuSZIkSepr0rwgyFs6r1VrCiE85vVmkiRJkvqytJaz24HLgLOA\ni4FG4I8hhPckmkqSJEmSMiTEGDP/h3Rcc/ZDYFKMcd0hHJ8NPAaMjDGOO8A+lwKXAhQUFBw3fvz4\nQw+sPqm9vZ2srLT+/wglybGhrjgudCBJjY09TZHKpkhuFgwfkEWuwzNV/Jmhrrzwwgs7Y4xl3d0/\nkwuC9JgYY1sI4XfA1SGEUTHGrV3scwNwA0B5eXlcscKFHfVyCxcuZP78+UnHUAo5NtQVx4UO5EiP\njT31LXz65sXcs7yCS+aM4j8umENxfq/4Fa5f8WeGuhJCWH8w+/fG/7IzP9UnSZKUAs9squL//Opp\ntlc3cuV5s/jnkycQQkg6lqQM6RXlLISQA7wT2BBj3JZ0HkmSpEyKMXLTExu48rbnGVacx80fOZlj\nx5cmHUtShmW0nIUQ3tH58rjO5zeFEHYAO2KM93fu0wrcGGP8UOf7dwFvBf4CbARGAB8D5gHvymRe\nSZKkpNU3t/KlPz7HHxdt5rRpZXznncdQWpSXdCxJR0CmZ85+t9/773c+3w/M73yd3fnYay0wHPgm\nMASoA54Ezo4x3pWxpJIkSQlbVVHLv/zyKVbtqOVfz5zGx94whawsT2OU+ouMlrMY46v+NNl/nxjj\nY8CCjIWSJElKoduWbOHzv3+GwtxsfvHBEzl16rCkI0k6wnrFNWeSJEl9VVNrG1//8zJufHQ9x08o\n5bp3z2PkoIKkY0lKgOVMkiQpIZsq6/nYTYtYsrGKS143ic+ePZ3cbO+VJfVXljNJkqQE3Leigk/9\ndjFtbZH/ec88zp49KulIkhJmOZMkSTqC2toj37n7Ba69dxUzRg3kBxfPY+KwoqRjSUoBy5kkSdIR\nUlHTyOW/Wcwjq3fxzuPHceVbZ1GQm/3qB0rqFyxnkiRJR8BDK3dy+W8XUdvUyjffMYcLjx+XdCRJ\nKWM5kyRJyqDWtna+e89KrrtvFVPKivn1JScxdURJ0rEkpZDlTJIkKUO2Vzdy2a8X8cTa3Vx0/Fiu\nPG82hXmexiipa5YzSZKkDLj/hR186reLaWhu45qL5nL+vLFJR5KUcpYzSZKkHtTa1s63//YCP1i4\nmvIRJXzv4nlMGV6cdCxJvYDlTJIkqYdsqWrgE79exJPrK3nXCeP4yltcjVFS91nOJEmSesC9y7fz\n6ZuX0NLaznf/6RjeesyYpCNJ6mUsZ5IkSYehpa2db961ghseWMOMUQP53ruPZXKZpzFKOniWM0mS\npEO0qbKey369iEUbqrj4xPH833NnehqjpENmOZMkSToEdz63lc/9/lna2iPXvftYzp0zOulIkno5\ny5kkSdJBaG6LfOmPz/Krxzdw9JhBXPuuY5k4rCjpWJL6AMuZJElSN63YVsOVjzawuXYDl75+Mp95\nYzl5OVlJx5LUR1jOJEmSXkWMkV8+voGr7nie/KzIjR88gdOmlSUdS1IfYzmTJEl6BVX1zXzu989w\n19LtvH5aGReMqbOYScoI5+ElSZIO4PE1u3jTdx/k3uUVfOnNM/jZ+1/DoPyQdCxJfZQzZ5IkSftp\nbWvn2ntXce29Kxk/ZAC//5fXMmfs4KRjSerjLGeSJEn72FzVwKd+s5gn1u3m/GPH8NW3zaY431+Z\nJGWeP2kkSZI67b13WWtbO//1zrm8/dixSUeS1I9YziRJUr/X2NLG1+54nl89voE5Ywfx3//kvcsk\nHXmWM0mS1K8t3bKHT/5mMasqavnI6yfzr967TFJCLGeSJKlfam+P/PDBNXzrrysoHZDHLz50Aq+b\n6hL5kpJjOZMkSf3OlqoG/vXmJTy6ZhdnzxrJN84/mtKivKRjSernLGeSJKlfueOZLXzxD8/S2h75\nzwvmcOHxYwnBe5dJSp7lTJIk9Qs1jS185dal/GHRZo4ZN5jvvPMYF/2QlCqWM0mS1Oc9uW43n7p5\nMZsrG/jE6VO5bMEUcrNd9ENSuljOJElSn9XS1s6196zkuvtWMaa0kN999GSOmzAk6ViS1CXLmSRJ\n6pPW7azjk79dzJKNVbzjuLF85S0zKSnITTqWJB2Q5UySJPUpMUZ++/eNfPWO58nNzuL7F8/jzUeP\nSjqWJL0qy5kkSeozdtU28cU/PstdS7fz2qOG8u2L5jJqUGHSsSSpWyxnkiSpT/jr0m184Q/PUtPY\nypfePIMPnTqJrCyXyJfUe1jOJElSr1bd2MJXb3+eW57axKzRA7npkmMoH1mSdCxJOmiWM0mS1Gs9\nsmon/3bLM2yrbuSyBVO4bMFU8nJcIl9S72Q5kyRJvU5DcxtX37mcnz2yjsnDirjloydz7PjSpGNJ\n0mGxnEmSpF5l8cYqPn3zYtbsqOP9r53I586eTmFedtKxJOmwWc4kSVKv0NzaznX3ruR7C1czoiSf\nX334RE6ZMizpWJLUYyxnkiQp9V7YXsOnfruYpVuquWDeWL5y3kwGekNpSX2M5UySJKVWW3vkxw+t\n4Vt/fYGS/Bz+5z3HcfbskUnHkqSMsJxJkqRU2rCrns/8bglPrNvNmTNH8I3zj2ZYcX7SsSQpYyxn\nkiQpVdrbIz9/dB1X37mCnKzAty6cywXzxhCCN5SW1LdZziRJUmqs21nHZ3//DE+s3c1p08r4xvlH\nM3pwYdKxJOmIsJxJkqTEtbdHfvbIOv7zruXkZmXxn++Yw4XHjXW2TFK/YjmTJEmJWruzjs/esoS/\nr6vkDeVlfP38oxk1yNkySf2P5UySJCWirXO27Jt3LSc3O8tryyT1e5YzSZJ0xK3ZUctnb3mGJ9dX\nsmD6cL7+9qMZOagg6ViSlCjLmSRJOmLa2iM/fXgt37xrBfk5WXz7wrmc72yZJAGWM0mSdISs7pwt\ne2p9JadPH87Xzz+aEQOdLZOkvSxnkiQpo1ra2rn+/tX89z2rKMjN4pqL5vL2Y50tk6T9Wc4kSVLG\nPLOpis/e8gzLt9Xw5qNHcsV5sxhe4myZJHXFciZJknpcQ3Mb1/xtBT9+aC3DivO5/r3HcdaskUnH\nkqRUy8rUF4cQxoYQrg0hPBpCqA8hxBDCxG4emxVC+EIIYV0IoTGEsCSEcEGmskqSpJ7zyKqdnPWd\nB/jhg2t552vG8bdPn2Yxk6RuyFg5A6YAFwGVwIMHeezXgCuA64A3AY8BvwshvLknA0qSpJ6zp76F\nz93yDO/+0eNkBbjpkhP5xvlzGFSYm3Q0SeoVMnla4wMxxhEAIYQPA2/szkEhhOHAZ4D/iDF+q3Pz\nfSGEKcB/AH/JRFhJknTo7nxuK//31qXsrmvmI6dN5lNnTKMgNzvpWJLUq2SsnMUY2w/x0LOAPOCX\n+23/JfCTEMKkGOPawwonSZJ6REV1I/9+61LuXLqNmaMG8tP3v4bZYwYlHUuSeqU0LggyC2gCVu23\nfWnn80zAciZJUoLa2yM
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f87f1348e50>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"plot(lambda x: math.log(1 + math.exp(x)))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Problem zanikającego gradientu (_vanishing gradient problem_)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Sigmoidalne funkcje aktywacji ograniczają wartości na wyjściach neuronów do niewielkich przedziałów ($(-1, 1)$, $(0, 1)$ itp.).\n",
|
|||
|
"* Jeżeli sieć ma wiele warstw, to podczas propagacji wstecznej mnożymy przez siebie wiele małych wartości → obliczony gradient jest mały.\n",
|
|||
|
"* Im więcej warstw, tym silniejszy efekt zanikania."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"#### Sposoby na zanikający gradient\n",
|
|||
|
"\n",
|
|||
|
"* Modyfikacja algorytmu optymalizacji (_RProp_, _RMSProp_)\n",
|
|||
|
"* Użycie innej funckji aktywacji (ReLU, softplus)\n",
|
|||
|
"* Dodanie warstw _dropout_\n",
|
|||
|
"* Nowe architektury (LSTM itp.)\n",
|
|||
|
"* Więcej danych, zwiększenie mocy obliczeniowej"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"## 5.7. Wielowarstwowe sieci neuronowe w praktyce"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"### Przykład: MNIST\n",
|
|||
|
"\n",
|
|||
|
"_Modified National Institute of Standards and Technology database_"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"* Zbiór cyfr zapisanych pismem odręcznym\n",
|
|||
|
"* 60 000 przykładów uczących, 10 000 przykładów testowych\n",
|
|||
|
"* Rozdzielczość każdego przykładu: 28 × 28 = 784 piksele"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 31,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# źródło: https://github.com/keras-team/keras/examples/minst_mlp.py\n",
|
|||
|
"\n",
|
|||
|
"import keras\n",
|
|||
|
"from keras.datasets import mnist\n",
|
|||
|
"\n",
|
|||
|
"# załaduj dane i podziel je na zbiory uczący i testowy\n",
|
|||
|
"(x_train, y_train), (x_test, y_test) = mnist.load_data()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 32,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def draw_examples(examples, captions=None):\n",
|
|||
|
" plt.figure(figsize=(16, 4))\n",
|
|||
|
" m = len(examples)\n",
|
|||
|
" for i, example in enumerate(examples):\n",
|
|||
|
" plt.subplot(100 + m * 10 + i + 1)\n",
|
|||
|
" plt.imshow(example, cmap=plt.get_cmap('gray'))\n",
|
|||
|
" plt.show()\n",
|
|||
|
" if captions is not None:\n",
|
|||
|
" print(6 * ' ' + (10 * ' ').join(str(captions[i]) for i in range(m)))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 33,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA6oAAACVCAYAAABRuAf7AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3XmUVNXd7vFno4ggICLEGA2Coigy\nKeAUl6DiFBFBFEMYlEThakTNG7kkhhgcEMVhvYDixBUjsgJGZDIaRUV4nVig0RtQDBoEUSIoMxi5\n6r5/dFd3/bbd1VXdp6pOVX0/a/XqeurUsLt4qNOnq3Zt570XAAAAAABxUS/fAwAAAAAAIBkHqgAA\nAACAWOFAFQAAAAAQKxyoAgAAAABihQNVAAAAAECscKAKAAAAAIiVSA9UnXM/ds495Zzb5pzb7px7\n2jnXKsr7QGGgC0igC5DoASrRBSTQBUj0ANVzUa2j6pxrJOldSV9LGiPJS7pNUiNJnbz3uyK5I8Qe\nXUACXYBED1CJLiCBLkCiB0ht7whv60pJh0tq573/UJKcc/9X0mpJIyTdG+F9Id7oAhLoAiR6gEp0\nAQl0ARI9QApRvqL6kqR9vfc/Cc5fLEne+x6R3BFijy4ggS5AogeoRBeQQBcg0QOkFuUrqsdKmlfF\n+SslXZLODTjnojlqRs54710VZ9epC/SgIH3hvW9Zxfl0ofRU1QX2DyWI/QPKsX9AQuT7B3pQkKp7\nTvieKD9MqbmkLVWcv1nSARHeD+KPLpSetdWcTxdKT1VdoAdIoAulh/0DEtg/QKr+OeF7onxFtVac\nc8MlDc/3OJBf9AAJdAEJdAESPUAlugCJHpSSKA9Ut6jqv3xU95cSSZL3/mFJD0u8fF9EMu4CPSha\ndAES+wdU4jkBCXQBEj1AClG+9Xelyt5nHmov6b0I7wfxRxeQQBcg0QNUogtIoAuQ6AFSiPJAdb6k\nk5xzhyfOcM61lvST8m0oHXQBCXQBEj1AJbqABLoAiR4ghSiXp9lPZQv2fqXKBXtvldREZQv27kzj\nNnj5vsBU9amOde0CPShIb3nvu4Vn0oWS9L0usH8oTewfUI79AxIi3z/Qg4JU5XNCVSJ7RdV7v0vS\nGZL+KWm6pBmS1kg6I51fQlA86AIS6AIkeoBKdAEJdAESPUBqkb2iGgX+KlJ4qlknr07oQUFK+69j\nmaALBYkuQBL7B1TgOQEJkXeBHhSk3L+iCgAAAABAFDhQBQAAAADECgeqAAAAAIBY4UAVAAAAABAr\nHKgCAAAAAGKFA1UAAAAAQKxwoAoAAAAAiJW98z0AoFR17drV5GuuucbkoUOHmvz444+bPHnyZJPf\nfvvtCEcHAACAbJk4caLJ1157bcXpFStWmG29e/c2ee3atdkbWIzwiioAAAAAIFY4UAUAAAAAxApv\n/Y3IXnvtZfL++++f9nXDt3w2atTI5Hbt2pn8q1/9yuS7777b5IEDB5r8n//8x+Q77rij4vTNN9+c\n9jhRN126dDF54cKFJjdt2tRk773JQ4YMMblPnz4mH3jggXUdIorEmWeeafKMGTNM7tGjh8kffPBB\n1seE7BgzZozJ4XN6vXr279E9e/Y0efHixVkZF4BoNGnSxOTGjRubfP7555vcsmVLk++9916Tv/76\n6whHh0y0bt3a5MGDB5v83XffVZw+5phjzLajjz7aZN76CwAAAABAHnCgCgAAAACIFQ5UAQAAAACx\nwhzVcq1atTJ5n332MfmUU04x+dRTTzW5WbNmJvfv3z+ysa1fv97kSZMmmdyvXz+Td+zYYfK7775r\nMnOScueEE06oOD179myzLZzHHM5JDf8d9+zZY3I4J/Wkk04yOVyuJrx+KTjttNMqToeP15w5c3I9\nnJzp3r27ycuWLcvTSBC1yy+/3OTRo0ebnDzHqSrh8wyA/Eueuxj+nz755JNN7tChQ0a3ffDBB5uc\nvAQKcmvTpk0mL1myxOTws0fAK6oAAAAAgJjhQBUAAAAAECscqAIAAAAAYqVk56iGa1q+/PLLJmey\nDmrUwjlG4Tp5O3fuNDlcI3HDhg0mb9myxWTWTIxOuObt8ccfb/ITTzxRcTqcJ1KT1atXmzxhwgST\nZ86cafJrr71mctib8ePHZ3T/xSB5zcgjjzzSbCumOarhWplt2rQx+bDDDjPZOZf1MSE7wn/Lfffd\nN08jQaZOPPFEk5PXUAzXNj722GNT3tYNN9xg8meffWZy+DkayfsiSVq6dGnqwSJS4RqY119/vcmD\nBg2qON2wYUOzLXy+/uSTT0wOP88iXH9zwIABJk+ZMsXkVatWVTdsRGzXrl0ml8paqHXBK6oAAAAA\ngFjhQBUAAAAAECscqAIAAAAAYqVk56iuW7fO5C+//NLkKOeohnNBtm7davLpp59ucrje5fTp0yMb\nC6L10EMPmTxw4MDIbjuc79q4cWOTw/Vwk+djSlKnTp0iG0uhGjp0aMXpN954I48jya5w/vOVV15p\ncjg/jTlJhaNXr14mjxw5MuXlw3/b3r17m/z5559HMzDU6NJLLzV54sSJJrdo0aLidDgP8ZVXXjG5\nZcuWJt91110p7zu8vfD6P/vZz1JeH5kJf2e88847TQ670KRJk7RvO/y8inPOOcfk+vXrmxw+ByT3\nrKqM3GnWrJnJnTt3ztNICgevqAIAAAAAYoUDVQAAAABArHCgCgAAAACIlbTmqDrnDpU0WlI3SZ0l\nNZTUxnv/cXC5fSXdKmmwpGaS3pE02nu/JMIxR2Lz5s0mjxo1yuRwXs/f//53kydNmpTy9t95552K\n02eddZbZFq6jFK6Xdt1116W87ThxznkVeBcy0bVrV5PPP/98k1OtTxnOKV2wYIHJd999t8nhunhh\nB8P1cc8444y0x5IFXZ1zrePWg3B90WI1derUlNvDOU5ZFssuFIpw/ctp06aZXNPnJ4RzF/O5Tl+x\n7x/23tv+CtWtWzeTH3nkEZPDdbeXLKn8cW+99Vaz7dVXXzW5QYMGJj/55JMmn3322SnHunz58pTb\ns6zonxP69etn8hVXXFHr2/roo49MDn+HDNdRbdu2ba3vKw+KvguphM8BrVq1Svu63bt3Nzmci1ys\na7Km+1tcW0kDJG2R9D8pLvd/JF0p6SZJvSVtkPS8c65LXQaJgkQXINEDVKILSKALkOgBKtEFVCnd\nT/1d4r0/SJKcc1dI+t6f7pxznSX9XNIvvPfTys9bLGmlpFsk9YlkxIg9ugCJHqASXUACXYBED1CJ\nLiCVtF5R9d5/l8bF+kj6f5JmJV3vG0kzJZ3jnGtQ3RVRdOgCJHqASnQBCXQBEj1AJbqAakW5juqx\nktZ473cH56+UtI/K3j68MsL7i9TcuXNNfvnll03esWOHyeHaR7/85S9NTp5vGM5JDa1caR+W4cOH\npx5s/BV0F5J16WLfdbJw4UKTmzZtarL33uTnnnuu4nS4xmqPHj1MHjNmjMnhvMNNmzaZ/O6775r8\n3Xf270nh/NlwXda3335bWZbzHoRrxx500EFR3nxs1TRvMextHhTNc0K2XXbZZSb/6Ec/Snn5cL3N\nxx9/POohRa1oujB48GCTa5orHv4/TF5bc/v27SmvG67DWdOc1PXr15v8pz/9KeXl86BoeiBJl1xy\nSUaX//jjj01etmxZxenRo0ebbeGc1NAxxxyT0X3HUFF1IZXws0cee+wxk8eOHVvtdcNtW7duNfm+\n++6ry9BiK8pPGmmusjmsoc1J21Ea6AIkeoBKdAEJdAESPUAluoBqRfmKaq0454ZLKviXEFE39AAJ\ndAEJdAESPUAlugCJHpSSKF9R3SLpgCrOT/wlZHMV2+S9f9h73817362q7ShIGXeBHhQlnhOQQBeQ\nwP4BEs8JqMRzAqoV5SuqKyX1c841Ct5n3l7SHkkfRnhfWVfTfJFt27al3H7llVdWnJ41a5bZFs4l\nLEIF24WjjjrK5HB93XA
|
|||
|
"text/plain": [
|
|||
|
"<matplotlib.figure.Figure at 0x7f87f132bed0>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" 5 0 4 1 9 2 1\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"draw_examples(x_train[:7], captions=y_train)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 34,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"60000 przykładów uczących\n",
|
|||
|
"10000 przykładów testowych\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"num_classes = 10\n",
|
|||
|
"\n",
|
|||
|
"x_train = x_train.reshape(60000, 784) # 784 = 28 * 28\n",
|
|||
|
"x_test = x_test.reshape(10000, 784)\n",
|
|||
|
"x_train = x_train.astype('float32')\n",
|
|||
|
"x_test = x_test.astype('float32')\n",
|
|||
|
"x_train /= 255\n",
|
|||
|
"x_test /= 255\n",
|
|||
|
"print('{} przykładów uczących'.format(x_train.shape[0]))\n",
|
|||
|
"print('{} przykładów testowych'.format(x_test.shape[0]))\n",
|
|||
|
"\n",
|
|||
|
"# przekonwertuj wektory klas na binarne macierze klas\n",
|
|||
|
"y_train = keras.utils.to_categorical(y_train, num_classes)\n",
|
|||
|
"y_test = keras.utils.to_categorical(y_test, num_classes)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 35,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"_________________________________________________________________\n",
|
|||
|
"Layer (type) Output Shape Param # \n",
|
|||
|
"=================================================================\n",
|
|||
|
"dense_4 (Dense) (None, 512) 401920 \n",
|
|||
|
"_________________________________________________________________\n",
|
|||
|
"dropout_1 (Dropout) (None, 512) 0 \n",
|
|||
|
"_________________________________________________________________\n",
|
|||
|
"dense_5 (Dense) (None, 512) 262656 \n",
|
|||
|
"_________________________________________________________________\n",
|
|||
|
"dropout_2 (Dropout) (None, 512) 0 \n",
|
|||
|
"_________________________________________________________________\n",
|
|||
|
"dense_6 (Dense) (None, 10) 5130 \n",
|
|||
|
"=================================================================\n",
|
|||
|
"Total params: 669,706\n",
|
|||
|
"Trainable params: 669,706\n",
|
|||
|
"Non-trainable params: 0\n",
|
|||
|
"_________________________________________________________________\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"model = Sequential()\n",
|
|||
|
"model.add(Dense(512, activation='relu', input_shape=(784,)))\n",
|
|||
|
"model.add(Dropout(0.2))\n",
|
|||
|
"model.add(Dense(512, activation='relu'))\n",
|
|||
|
"model.add(Dropout(0.2))\n",
|
|||
|
"model.add(Dense(num_classes, activation='softmax'))\n",
|
|||
|
"model.summary()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 36,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"((60000, 784), (60000, 10))\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(x_train.shape, y_train.shape)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 37,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Train on 60000 samples, validate on 10000 samples\n",
|
|||
|
"Epoch 1/5\n",
|
|||
|
"60000/60000 [==============================] - 14s - loss: 0.2457 - acc: 0.9244 - val_loss: 0.1250 - val_acc: 0.9605\n",
|
|||
|
"Epoch 2/5\n",
|
|||
|
"60000/60000 [==============================] - 14s - loss: 0.1021 - acc: 0.9691 - val_loss: 0.0859 - val_acc: 0.9748\n",
|
|||
|
"Epoch 3/5\n",
|
|||
|
"60000/60000 [==============================] - 14s - loss: 0.0746 - acc: 0.9773 - val_loss: 0.0884 - val_acc: 0.9744\n",
|
|||
|
"Epoch 4/5\n",
|
|||
|
"60000/60000 [==============================] - 13s - loss: 0.0619 - acc: 0.9815 - val_loss: 0.0893 - val_acc: 0.9754\n",
|
|||
|
"Epoch 5/5\n",
|
|||
|
"60000/60000 [==============================] - 15s - loss: 0.0507 - acc: 0.9849 - val_loss: 0.0845 - val_acc: 0.9771\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<keras.callbacks.History at 0x7f87e996a2d0>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 37,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"model.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy'])\n",
|
|||
|
"\n",
|
|||
|
"model.fit(x_train, y_train, batch_size=128, epochs=5, verbose=1,\n",
|
|||
|
" validation_data=(x_test, y_test))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 38,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Test loss: 0.0845102945257\n",
|
|||
|
"Test accuracy: 0.9771\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"score = model.evaluate(x_test, y_test, verbose=0)\n",
|
|||
|
"\n",
|
|||
|
"print('Test loss: {}'.format(score[0]))\n",
|
|||
|
"print('Test accuracy: {}'.format(score[1]))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Warstwa _dropout_ to metoda regularyzacji, służy zapobieganiu nadmiernemu dopasowaniu sieci. Polega na tym, że część węzłów sieci jest usuwana w sposób losowy."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 39,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"_________________________________________________________________\n",
|
|||
|
"Layer (type) Output Shape Param # \n",
|
|||
|
"=================================================================\n",
|
|||
|
"dense_7 (Dense) (None, 512) 401920 \n",
|
|||
|
"_________________________________________________________________\n",
|
|||
|
"dense_8 (Dense) (None, 512) 262656 \n",
|
|||
|
"_________________________________________________________________\n",
|
|||
|
"dense_9 (Dense) (None, 10) 5130 \n",
|
|||
|
"=================================================================\n",
|
|||
|
"Total params: 669,706\n",
|
|||
|
"Trainable params: 669,706\n",
|
|||
|
"Non-trainable params: 0\n",
|
|||
|
"_________________________________________________________________\n",
|
|||
|
"Train on 60000 samples, validate on 10000 samples\n",
|
|||
|
"Epoch 1/5\n",
|
|||
|
"60000/60000 [==============================] - 11s - loss: 0.2214 - acc: 0.9314 - val_loss: 0.1048 - val_acc: 0.9668\n",
|
|||
|
"Epoch 2/5\n",
|
|||
|
"60000/60000 [==============================] - 12s - loss: 0.0838 - acc: 0.9739 - val_loss: 0.0842 - val_acc: 0.9752\n",
|
|||
|
"Epoch 3/5\n",
|
|||
|
"60000/60000 [==============================] - 10s - loss: 0.0548 - acc: 0.9829 - val_loss: 0.0806 - val_acc: 0.9773\n",
|
|||
|
"Epoch 4/5\n",
|
|||
|
"60000/60000 [==============================] - 9s - loss: 0.0387 - acc: 0.9878 - val_loss: 0.0713 - val_acc: 0.9804\n",
|
|||
|
"Epoch 5/5\n",
|
|||
|
"60000/60000 [==============================] - 9s - loss: 0.0297 - acc: 0.9911 - val_loss: 0.0847 - val_acc: 0.9787\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<keras.callbacks.History at 0x7f87e82d1350>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 39,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Bez warstw Dropout\n",
|
|||
|
"\n",
|
|||
|
"num_classes = 10\n",
|
|||
|
"\n",
|
|||
|
"(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
|
|||
|
"\n",
|
|||
|
"x_train = x_train.reshape(60000, 784) # 784 = 28 * 28\n",
|
|||
|
"x_test = x_test.reshape(10000, 784)\n",
|
|||
|
"x_train = x_train.astype('float32')\n",
|
|||
|
"x_test = x_test.astype('float32')\n",
|
|||
|
"x_train /= 255\n",
|
|||
|
"x_test /= 255\n",
|
|||
|
"\n",
|
|||
|
"y_train = keras.utils.to_categorical(y_train, num_classes)\n",
|
|||
|
"y_test = keras.utils.to_categorical(y_test, num_classes)\n",
|
|||
|
"\n",
|
|||
|
"model_no_dropout = Sequential()\n",
|
|||
|
"model_no_dropout.add(Dense(512, activation='relu', input_shape=(784,)))\n",
|
|||
|
"model_no_dropout.add(Dense(512, activation='relu'))\n",
|
|||
|
"model_no_dropout.add(Dense(num_classes, activation='softmax'))\n",
|
|||
|
"model_no_dropout.summary()\n",
|
|||
|
"\n",
|
|||
|
"model_no_dropout.compile(loss='categorical_crossentropy',\n",
|
|||
|
" optimizer=RMSprop(),\n",
|
|||
|
" metrics=['accuracy'])\n",
|
|||
|
"\n",
|
|||
|
"model_no_dropout.fit(x_train, y_train,\n",
|
|||
|
" batch_size=128,\n",
|
|||
|
" epochs=5,\n",
|
|||
|
" verbose=1,\n",
|
|||
|
" validation_data=(x_test, y_test))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 40,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Test loss (no dropout): 0.0846566448619\n",
|
|||
|
"Test accuracy (no dropout): 0.9787\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Bez warstw Dropout\n",
|
|||
|
"\n",
|
|||
|
"score = model_no_dropout.evaluate(x_test, y_test, verbose=0)\n",
|
|||
|
"\n",
|
|||
|
"print('Test loss (no dropout): {}'.format(score[0]))\n",
|
|||
|
"print('Test accuracy (no dropout): {}'.format(score[1]))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 41,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"_________________________________________________________________\n",
|
|||
|
"Layer (type) Output Shape Param # \n",
|
|||
|
"=================================================================\n",
|
|||
|
"dense_10 (Dense) (None, 2500) 1962500 \n",
|
|||
|
"_________________________________________________________________\n",
|
|||
|
"dense_11 (Dense) (None, 2000) 5002000 \n",
|
|||
|
"_________________________________________________________________\n",
|
|||
|
"dense_12 (Dense) (None, 1500) 3001500 \n",
|
|||
|
"_________________________________________________________________\n",
|
|||
|
"dense_13 (Dense) (None, 1000) 1501000 \n",
|
|||
|
"_________________________________________________________________\n",
|
|||
|
"dense_14 (Dense) (None, 500) 500500 \n",
|
|||
|
"_________________________________________________________________\n",
|
|||
|
"dense_15 (Dense) (None, 10) 5010 \n",
|
|||
|
"=================================================================\n",
|
|||
|
"Total params: 11,972,510\n",
|
|||
|
"Trainable params: 11,972,510\n",
|
|||
|
"Non-trainable params: 0\n",
|
|||
|
"_________________________________________________________________\n",
|
|||
|
"Train on 60000 samples, validate on 10000 samples\n",
|
|||
|
"Epoch 1/10\n",
|
|||
|
"60000/60000 [==============================] - 212s - loss: 0.7388 - acc: 0.7954 - val_loss: 0.2908 - val_acc: 0.9172\n",
|
|||
|
"Epoch 2/10\n",
|
|||
|
"60000/60000 [==============================] - 191s - loss: 0.2390 - acc: 0.9305 - val_loss: 0.1833 - val_acc: 0.9470\n",
|
|||
|
"Epoch 3/10\n",
|
|||
|
"60000/60000 [==============================] - 166s - loss: 0.1688 - acc: 0.9517 - val_loss: 0.1555 - val_acc: 0.9549\n",
|
|||
|
"Epoch 4/10\n",
|
|||
|
"60000/60000 [==============================] - 166s - loss: 0.1344 - acc: 0.9614 - val_loss: 0.1274 - val_acc: 0.9621\n",
|
|||
|
"Epoch 5/10\n",
|
|||
|
"60000/60000 [==============================] - 166s - loss: 0.1074 - acc: 0.9683 - val_loss: 0.1213 - val_acc: 0.9661\n",
|
|||
|
"Epoch 6/10\n",
|
|||
|
"60000/60000 [==============================] - 440s - loss: 0.0924 - acc: 0.9725 - val_loss: 0.1066 - val_acc: 0.9709\n",
|
|||
|
"Epoch 7/10\n",
|
|||
|
"60000/60000 [==============================] - 169s - loss: 0.0768 - acc: 0.9773 - val_loss: 0.1777 - val_acc: 0.9517\n",
|
|||
|
"Epoch 8/10\n",
|
|||
|
"60000/60000 [==============================] - 183s - loss: 0.0657 - acc: 0.9805 - val_loss: 0.1053 - val_acc: 0.9711\n",
|
|||
|
"Epoch 9/10\n",
|
|||
|
"60000/60000 [==============================] - 170s - loss: 0.0572 - acc: 0.9832 - val_loss: 0.1044 - val_acc: 0.9717\n",
|
|||
|
"Epoch 10/10\n",
|
|||
|
"60000/60000 [==============================] - 166s - loss: 0.0493 - acc: 0.9851 - val_loss: 0.0938 - val_acc: 0.9752\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<keras.callbacks.History at 0x7f87f007f610>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 41,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Więcej warstw, inna funkcja aktywacji\n",
|
|||
|
"\n",
|
|||
|
"num_classes = 10\n",
|
|||
|
"\n",
|
|||
|
"(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
|
|||
|
"\n",
|
|||
|
"x_train = x_train.reshape(60000, 784) # 784 = 28 * 28\n",
|
|||
|
"x_test = x_test.reshape(10000, 784)\n",
|
|||
|
"x_train = x_train.astype('float32')\n",
|
|||
|
"x_test = x_test.astype('float32')\n",
|
|||
|
"x_train /= 255\n",
|
|||
|
"x_test /= 255\n",
|
|||
|
"\n",
|
|||
|
"y_train = keras.utils.to_categorical(y_train, num_classes)\n",
|
|||
|
"y_test = keras.utils.to_categorical(y_test, num_classes)\n",
|
|||
|
"\n",
|
|||
|
"model3 = Sequential()\n",
|
|||
|
"model3.add(Dense(2500, activation='tanh', input_shape=(784,)))\n",
|
|||
|
"model3.add(Dense(2000, activation='tanh'))\n",
|
|||
|
"model3.add(Dense(1500, activation='tanh'))\n",
|
|||
|
"model3.add(Dense(1000, activation='tanh'))\n",
|
|||
|
"model3.add(Dense(500, activation='tanh'))\n",
|
|||
|
"model3.add(Dense(num_classes, activation='softmax'))\n",
|
|||
|
"model3.summary()\n",
|
|||
|
"\n",
|
|||
|
"model3.compile(loss='categorical_crossentropy',\n",
|
|||
|
" optimizer=RMSprop(),\n",
|
|||
|
" metrics=['accuracy'])\n",
|
|||
|
"\n",
|
|||
|
"model3.fit(x_train, y_train,\n",
|
|||
|
" batch_size=128,\n",
|
|||
|
" epochs=10,\n",
|
|||
|
" verbose=1,\n",
|
|||
|
" validation_data=(x_test, y_test))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 42,
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Test loss: 0.0937788957049\n",
|
|||
|
"Test accuracy: 0.9752\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Więcej warstw, inna funkcja aktywacji\n",
|
|||
|
"\n",
|
|||
|
"score = model3.evaluate(x_test, y_test, verbose=0)\n",
|
|||
|
"\n",
|
|||
|
"print('Test loss: {}'.format(score[0]))\n",
|
|||
|
"print('Test accuracy: {}'.format(score[1]))"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"celltoolbar": "Slideshow",
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 2
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython2",
|
|||
|
"version": "2.7.15rc1"
|
|||
|
},
|
|||
|
"livereveal": {
|
|||
|
"start_slideshow_at": "selected",
|
|||
|
"theme": "amu"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|