zuma/w5/zumz5.ipynb

2635 lines
217 KiB
Plaintext
Raw Normal View History

2021-03-17 20:09:43 +01:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"## Uczenie maszynowe UMZ 2017/2018\n",
"# 5. Sieci neuronowe"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"slideshow": {
"slide_type": "notes"
}
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"\n",
"import math\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import random"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"## 5.1. Perceptron"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"scrolled": true,
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"data": {
"image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAUDBAQEAwUEBAQFBQUGBwwIBwcHBw8LCwkMEQ8SEhEP\nERETFhwXExQaFRERGCEYGh0dHx8fExciJCIeJBweHx7/2wBDAQUFBQcGBw4ICA4eFBEUHh4eHh4e\nHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh7/wAARCAFoAeADASIA\nAhEBAxEB/8QAHAAAAQUBAQEAAAAAAAAAAAAAAgABAwQFBgcI/8QAQBAAAQMCBAQEBAQEBQQCAwEA\nAQACEQMEBRIhMQZBUWETInGBMpGhsRRCwdEzcuHwFSM1UmIHJDTxFiVDc4KS/8QAFwEBAQEBAAAA\nAAAAAAAAAAAAAAECA//EAB8RAQEBAAMBAQEAAwAAAAAAAAABEQIhMUESA1Fhcf/aAAwDAQACEQMR\nAD8A+V+BwPxrpaCYOp9CureXaguJ3hclwU7LfuA3g/YrqLioZiYMla/nfUjncWotddkuEGdx6KKl\nUc3Y6LYr0W1CTlBPU6rKrsNN+XMPklrVS062sk6hTi4AEaKgXBoA0JPNO1rid1jEaBbSqfm0Ttsx\nqaen6qhlqU/hkI2XFdpHmIHRbgvi0uPyyVHUo1cvmaY5hFQxKq0QQDHdWqOJWzmEVKYLvX+i1RnP\nzZA12gGwUfPdBdVKlS4c5phpOgB5KE+INZJUE2cdULniRBUeQ/7k4bEyZ9lGUgcnB01MqMuA5FIe\nVNxYkZueSeVHm5z8k4Pcpi9pmu1UrHESoWMdzBU9KmNcxWrRIC7eVPRzuqNJEkmFEzIByWxw1a/j\nL6lSI8oPSeSzYOy4dwOnWwhr6wIJAI26Kapw9Qnyz8x+y37em23tKdu0A5QBI05IhHRTExzJwF7f\nNTkDpIRMw2uwEEH6LpQlDSdQCrjeOcNlWH5SVGKdRv5SupDW8mj5IatrTcPhHyTErmDUdTHmHoib\nWJZK234dSdyB9tlE/DaewMeyYjOY2TAUotyYJVwWZbrP0Sqt8JmZxEd9ExcV2UgBCMU2tOiYVKbi\nMtRuvdEGkiQUxQuAMRyTZQnynZI7wSgAU+cckgORGyIN0nMJ6SkQOsR7qgZhDB6Qj9pShSmonjbW\nU2VSmdNJQz/x+qJgA3RRVGjaSPRWS0BAaYkH9EwxULREAT6qM0SQW5dlcNITM/RJzRuNExZ0z6tm\nyo0B7ZjZUq+EUHknIdfRbRbqT1Q6piuYueG6ZHlza9x+yz6/DDmz4ebvJBXakb7+ijMtB826tjH5\nrga3D1y0GKZPuFn1cJumHzUyB7L0p7QRBbKrPotcZcwEdws4n5sedULGr+Lpscx0Eie2qbHfE/EC\ni1phgH2/qu/qWlGQ5tNoInUAKjc4XQqPLnMaSecJhY8883OU+c8l11bA6ZGjRPZqpV8BjVojf8qY\nrnvGe34SgEyPMtmpgdYCZ0/lVR2F3DdcrjH/ABKliVRqbnXkVj1Pjd6roKljXEkscBB5FYNZpbVc\n08j0Qje4K/1B3odfYrpq85yBrBOq53gj/wA2p6H7FdHXacwiVvh/xZFd5dOhhZeJ0XGqCFseGUxY\nHCDspyi2Obe1zSMx2Th4bzVjFAadYADQkqoHDnopDEza1TdyNlam4+bSFExw2KIU2vBPRVF+lTt6\ngGUwVI7D9Za4ALObTeD5dFM2tXpgDUkK7oa5piiYLpOqr5gTupbqo+4LS9sFsx/fsogyPi2VvQHx\nD1SLi7mUfl6pBzW891kwABPWFKKbolyA1nflCFniO3lTVzEzSwSHJFzOQSp27jJJUoo0xuVfTs4q\nE7KalTqVD5dhukH0W6t1KMV3uBFPbmkX8p6dvp5jBXacAWjTd+IANCfsuKp06lQg7L0jgXy03NHK\nfsru+n5dPUHnJTKQaDuUxTpcJgkqQMTtGqlpt0MrSYANEJy1TZQmLSh+UIbvCjdTM8lO5pQEIflA\nWnos7HQRYudzAMfJazws3Gmh1m6eh+yzhrh6l1cU3EtcYlGMZvKZnPI9B+yC7pNDzvuqjqYTMGpR\n4gqT5xr7K7Sx6lPmaZ9AucyhRua6dFMHYMxO0cRlcrdKvSqguYZ6rgTm3zJCpUafK6FTHoQjklmX\nB0r6uwaO+itUsbuGkFxB9ioWOxOuybKVzdLiI/maD6Aq7Rx+3Il5IPoUwjZAkpOZsqlPErWoYFQD\n1BVlteg5py1GmeS1pp8qiIRgtI0LT7ojAGYbpqq5aEBbqpy4uMnRCWyUnYgLVG9sx2U5TZU1rVYw\nEBAhWyFEW6dFEs1Ve0aQoiw9Fbe0yEDWnVGbNUTSHJA+l5DIk8lo+H6oalPbQoflk+DmHmbISNu1\n27WrT8P1QlpUp+dZbrGm5j8zBo0kfJeVYuzJidw0DQPK9oLf8qp/KfsV4zj3+sXX8/6LOM2Y2OBR\n/wB5UG+h+xXUVW6jTquZ4C81/U9D9iuvq0xA06rf8pe1ioGpvDEaKx4Y6IQpb2tZl5ZNrOB2ieUr\nDvKbaNUsIiOey657TIA5rFxbD31qudsDX9E0Ype2fKPqjzugZZ7wUFWj4Jh5gpmF0ExoUZT0q7hu\nPmVcpXlOTnYJPNZ7HCdVNT8N2k6pBbuKlAsDmRImQOaz31C53wkBXGWpcCQ7RQPa1ji0nUK3wRNo\nucdCfkpG2+X4nD3QsuCBA3QuNSoR5oUaTFtJuhcJTmu3XK0KJlAjVxlSBlNpkqUMK1R5Ia0iOikF\nOo8SSUIrU26NGpRis/8ALsrBPSotafO4DsrDKtKmCGtBncyqrbeo4y5xA9VZt7emXBuaT6rWKtUH\nvewva0gDkvQeAHZsxI6/ZcC53hsFICF3n/T7Z3v9lB1h3idkQahRt3ToSN3CmpDdQt3Cno7FaBZU\nRGiSMhBXqjZAWqZ42TZSrggLTCzsZbFo70K0jmWfjU/hHT0P2URw99/ELQPdVCQTGX6q7fD/ADlT\njSVKI35Z+FRE9BClq7D3UJUUHh/8pSyjl9VIUCtCEc2j5KF0O3aplHlPZZiVF4eUmJEpZT1KkLTo\nlCoHxntPlkehhXMOfVc8/wCY7lzKpOiVpYS0F59kE1e6uaJ8tVxA7lOzHbhplzSR0k/slfsGu/yW\ne9rXDQqUjVZxBqfEpkdPN/RWqOO2rvidlPrK5p9IAGXBRZN4Wortad/bOOlRvzCnFalUHle0xvqN\nFwYzt+Go75ohc3VH4arhKhruiW/7m/NAYXGNxS6b/wDkJ+Sm/wAfuG/Fr7BB1T3NBEqMPaJ5z1XO\nU+ImkxUaR7BWaeOWrh5pB9v3QbXit5uhMajTsQVlf4nbO2cUTLyk4nI4nqg1RlPID3URaOqqsuWx\nMpn3WiVdWnNHhVNd2n7LxXiNsY1dD/n+gXrgvPK+dRBXkXEL82NXLur/ANApjFra/wCnn+pv/l/Q\nrtq5jn1XIf8ATG2qXOKvbTBJykbdivQjhFVzocYg8wtcPGYxTBOqY06g0a1bzcLpU58R7QR1A/dO\n6pYW4zP8M+pCxybYLbe4cQG0ySVctcJuKzoecg9QrdbHMNps8vhyOjgsDEeKnUnk25BE8qn9EhLi\nlxlw+63q+NTJcABzHUrlB5SQ4QVv4jxBcXlMsdIB0nNKwqjSXZiTJM7KwO1zVIxrZl26hACdpdzk\nrfxKt0szT5dVFc03OfnKemX6kEgDklVqVHDRsrFZwNJzWndTuusoGXT2VdrQFICyPMAVWyNSrUOY\nbJNtzGYoxWgRTaD6JEVCeYHRA9NtFurjqpRVgRT+yha1gMuM9lMxzCIY0BA/nc8wMoVy08v+YdS3\nUKmxrnOlzp9lMamVnhgSDodUFku8Wqap1nT+/kvQP+n+hLeUn7Lzui7QQ2IXofAZEggyTOvsg7H8\nxHdSsURIzEbnqpGHsgVV7msJjmjtKjnEZuUJn+dpbtKKizLqD05LQuZ9ks3VQwXHeFI/zdk6+kIb\nk9U6ZrdDrKRTYqFyzs
"text/html": [
"\n",
" <iframe\n",
" width=\"800\"\n",
" height=\"600\"\n",
" src=\"https://www.youtube.com/embed/cNxadbrN_aI\"\n",
" frameborder=\"0\"\n",
" allowfullscreen\n",
" ></iframe>\n",
" "
],
"text/plain": [
"<IPython.lib.display.YouTubeVideo at 0x7f8808605e10>"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from IPython.display import YouTubeVideo\n",
"YouTubeVideo('cNxadbrN_aI', width=800, height=600)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"<img style=\"margin: auto\" width=\"80%\" src=\"http://m.natemat.pl/b94a41cd7322e1b8793e4644e5f82683,641,0,0,0.png\" alt=\"Frank Rosenblatt\"/>"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"<img style=\"margin: auto\" src=\"http://m.natemat.pl/02943a7dc0f638d786b78cd5c9e75742,641,0,0,0.png\" width=\"70%\" alt=\"Frank Rosenblatt\"/>"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"<img style=\"margin: auto\" width=\"50%\" src=\"https://upload.wikimedia.org/wikipedia/en/5/52/Mark_I_perceptron.jpeg\" alt=\"perceptron\"/>"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Pierwszy perceptron liniowy\n",
"\n",
"* Frank Rosenblatt, 1957\n",
"* aparat fotograficzny podłączony do 400 fotokomórek (rozdzielczość obrazu: 20 x 20)\n",
"* wagi potencjometry aktualizowane za pomocą silniczków"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Uczenie perceptronu\n",
"\n",
"Cykl uczenia perceptronu Rosenblatta:\n",
"\n",
"1. Sfotografuj planszę z kolejnym obiektem.\n",
"1. Zaobserwuj, która lampka zapaliła się na wyjściu.\n",
"1. Sprawdź, czy to jest właściwa lampka.\n",
"1. Wyślij sygnał „nagrody” lub „kary”."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Funkcja aktywacji\n",
"\n",
"Funkcja bipolarna:\n",
"\n",
"$$ g(z) = \\left\\{ \n",
"\\begin{array}{rl}\n",
"1 & \\textrm{gdy $z > \\theta_0$} \\\\\n",
"-1 & \\textrm{wpp.}\n",
"\\end{array}\n",
"\\right. $$\n",
"\n",
"gdzie $z = \\theta_0x_0 + \\ldots + \\theta_nx_n$,<br/>\n",
"$\\theta_0$ to próg aktywacji,<br/>\n",
"$x_0 = 1$. "
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"slideshow": {
"slide_type": "notes"
}
},
"outputs": [],
"source": [
"def bipolar_plot():\n",
" matplotlib.rcParams.update({'font.size': 16})\n",
"\n",
" plt.figure(figsize=(8,5))\n",
" x = [-1,-.23,1] \n",
" y = [-1, -1, 1]\n",
" plt.ylim(-1.2,1.2)\n",
" plt.xlim(-1.2,1.2)\n",
" plt.plot([-2,2],[1,1], color='black', ls=\"dashed\")\n",
" plt.plot([-2,2],[-1,-1], color='black', ls=\"dashed\")\n",
" plt.step(x, y, lw=3)\n",
" ax = plt.gca()\n",
" ax.spines['right'].set_color('none')\n",
" ax.spines['top'].set_color('none')\n",
" ax.xaxis.set_ticks_position('bottom')\n",
" ax.spines['bottom'].set_position(('data',0))\n",
" ax.yaxis.set_ticks_position('left')\n",
" ax.spines['left'].set_position(('data',0))\n",
"\n",
" plt.annotate(r'$\\theta_0$',\n",
" xy=(-.23,0), xycoords='data',\n",
" xytext=(-50, +50), textcoords='offset points', fontsize=26,\n",
" arrowprops=dict(arrowstyle=\"->\"))\n",
"\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAdMAAAElCAYAAAC/NQipAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3XlYlPX+//HXKDBCkEJq7slRsYSK\no+QleEwjLXJtcT1hYouaWRHlUTu4lBqW2sFzyrUFU7sS7CqJXMoMy8qSOqZBmSaQlqZmHksEBe7f\nH32ZX9OoDNwMN8vzcV1c4WeZec/dcL+4l/lgMwxDAACg8hpYXQAAALUdYQoAgEmEKQAAJhGmAACY\nRJgCAGASYQoAgEmEKQAAJhGmAACYRJgCAGASYQrUAjabbZPVNQC4MK8KjmftQcAC3bp1k/j5A6xg\nc2cQR6YAAJhEmAIAYBJhCgCASYQpAAAmEaYAAJhEmAIAYBJhCgCASYQpAAAmEaYAAJhEmAIAYBJh\nCgCASYQpAAAmEaYAAJhEmAIAYBJhCgCASYQp4KZDhw7pwQcfVGRkpPz8/GSz2ZSXl+fW3NLSUiUl\nJal9+/Zq1KiRrr32Wr3++uueLRhAtSFMATft379fqampCgwMVK9evSo0d/r06Zo1a5YmTZqkjRs3\nqkePHho2bJg2bNjgoWoBVCebYRgVGV+hwUBdUlpaqgYNfv/984UXXtB9992n3NxctW/f/qLzjh49\nqrZt22rq1Kl64oknHO033nijjh07pt27d5f73BEREcrKyjJVP4BKsbkziCNTwE1lQVpRmzdv1tmz\nZxUbG+vUHhsbqz179ig3N7cqygNgIcIU8LDs7GzZ7XZ17NjRqT00NFSSlJOTY0VZAKpQhU7z9unT\nx4iLi1NcXJyOHz+uoUOHuoy5//77NWLECB08eFCjR4926X/00Uc1aNAg7d27V+PHj3fpT0xMVN++\nfbVr1y7Fx8e79D/11FOKiorSxx9/rMcff9ylPzk5WeHh4dqyZYvmzJnj0r9s2TJ17txZb731lhYu\nXOjSv2rVKrVt21Zr167VkiVLXPrXrVunpk2bKiUlRSkpKS79GzZskJ+fnxYvXqzU1FSX/szMTEnS\nggULlJGR4dTn6+urjRs3SpJmz56t9957z6n/sssuc9y0Mm3aNH3yySdO/W3atNHq1aslSfHx8dq1\na5dTf0hIiJYvXy5JGjdunL799lun/vDwcCUnJ0v6/ajp0KFDTv2RkZFKSkqSJN1xxx36+eefnfpv\nvPFGTZ8+XZJ0yy236MyZM079AwcO1GOPPSZJ6tOnj/5s+PDhmjhxogoKCtS/f3+X/qp8742ctUIn\n2/SU0dDHZVxNdHhlvFqOSba6DKBatN8x3/G91fu9zMxMTvMCF1KbghRAzccNSKiX2k992+oSKoQj\nU9QnefMGWF3CH7l1ZOrl6SqAmq4yP7gVuZv3lVde0ZgxY7Rv3z6n66YpKSkaO3asDhw4oODg4Is+\nRsSWmcqqWTsYAH/AaV7Aw2JiYuTt7a01a9Y4ta9evVphYWHlBimAmo8jU6AC1q1bJ0n6/PPPJUkb\nN25Us2bN1KxZM/Xu3VuS5OXlpTFjxujFF1+UJDVv3lwJCQlKSkpSQECAunbtqrVr12rr1q1KT0+3\n5oUAqFKEKVABw4YNc/r3xIkTJUm9e/d23KldUlKikpISp3Fz586Vv7+/Fi1apCNHjqhz585KTU3V\nwIEDq6VuAJ5FmAIV4M4Ne+cb07BhQyUmJioxMdETZQGwGNdMAQAwiTAFAMAkwhQAAJMIUwAATCJM\nAQAwiTAFAMAkwhQAAJMIUwAATCJMAQAwiTAFAMAkwhQAAJMIUwAATCJMAQAwiTAFAMAkwhQAAJMI\nUwAATCJMAQAwiTAFAMAkwhQAAJMIUwAATCJMAQAwiTAFAMAkwhQAAJMIUwAATCJMAQAwiTAFAMAk\nwhQAAJMIUwAATCJMAQAwiTAFAMAkwhQAAJMIUwAATCJMAQAwiTAFAMAkwhQAAJMIUwAATCJMUaVO\nnjyp2bNnKzw8XAEBAQoKClJ0dLQ2bNhgdWkA4DFeVheAuiMzM1N///vfdfjwYaf2999/X5mZmVqy\nZInGjx9vUXUA4DkcmaJKZGZmqn///jp8+LBiY2O1c+dO/fLLL/rss88UGRkpwzCUkJCgQ4cOWV0q\nAFQ5whSm/fTTTxoxYoTOnDmjZ555RqtWrVJERISaNGmi6667TuvXr5e/v78KCgr06quvWl0uAFQ5\nwhSmTZ48WUePHtXAgQM1efJkl/5mzZqpZ8+ekqRt27ZVd3kA4HGEKUz55ptvtGbNGtlsNj3zzDMX\nHNesWTNJUn5+fnWVBgDVhjCFKcuWLVNpaan69u2rq6666oLjzp075/RfAKhLCFNUWmlpqV577TVJ\n0p133nnRsSdOnJAk+fr6erwuAKhuhCkqbdeuXTpy5IgkKS4uTjab7YJf7777riSpbdu2VpZs2sGD\nBzV06FA1btxYl156qW6//XZ9//33bs290LbZtWuXh6sG4Gl8zhSVVpmbiTp16uSBSqpHQUGBoqOj\nZbfbtXLlStlsNiUmJuqGG27Q7t27dckll5T7GHFxcS6ftQ0JCfFUyQCqCWGKSvviiy8kSTfffLPe\nfPPNC47btGmTbrvtNklS165dq6U2T1ixYoUOHDigvXv3qmPHjpKka665Rp06ddKyZcuUkJBQ7mO0\nbt1aPXr08HSpAKoZp3lRad9++60kqV27dmrUqNEFv3bs2OGYc/311zs9xtq1axURESFfX181bdpU\nI0eOVF5eXnW+DLelp6erR48ejiCVpODgYPXs2VPr16+3sDIAViNMUWllqxkFBQVddNzbb78tSbrq\nqqvUrl07R/vSpUs1cuRINWrUSMnJyYqPj9eWLVsUGRlZI1dKys7OVlhYmEt7aGiocnJy3HqMJUuW\nyG63y8/PT9HR0frwww+rukwAFuA0LyrtzJkzkqRGjRpdcMw333yjr776SpI0evRoR/uJEyf0j3/8\nQ127dlVmZqa8vH5/K8bExKh79+5KTExUSkqK54qvhBMnTigwMNClPSgoSL/88ku582NjYzVw4EC1\natVK+fn5mj9/vqKjo/Xuu++qT58+HqgYQHXhyBSV5uPjI0k6ffr0BccsXrxY0u8fiRk7dqyj/c03\n39Svv/6qhx56yBGkkhQREaHrr79eaWlpKiws9FDl1li1apVGjBihXr16KTY2Vtu3b1erVq2UmJh4\n3vHLly9XRESEIiIidOzYsWquFkBFEKaotCuuuEKStHfv3vP25+bmavny5ZKkSZMmqUWLFo6+zz77\nTJIUFRXlMi8qKkoFBQVunzqtLoGBgec9Ar3QEWt5AgICNGDAAO3cufO8/ePGjVNWVpaysrIcK0gB\nqJkIU1Ra7969JUnvvPOOfvzxR6e+06dPa8SIESoqKlJISIhmzpzp1P/DDz9Iktq0aePyuGVtNe26\naWhoqLKzs13ac3Jy1KVLl0o/rs1mM1MWgBqAMEWljR07Vl5eXioqKtLgwYMdf3btnXfeUc+ePbVz\n504FBQUpLS3N5TOYBQUFkiS73e7yuGXXYMvG1BSDBw/Wjh07dODAAUdbXl6ePvroIw0ePLjCj3fq\n1CllZGSoe/fuVVkmAAsQpqi0q666ynHE+fnnn6t79+4KCgrSzTffrC+//FJ/+ctf9N577+maa65x\nmevn5ydJKioqcukru1ZaNqamuO+++9S+fXsNGTJE69evV3p6uoYMGaK2bds6LcSQn58vLy8vPfnk\nk462BQsW6L777tOrr76qzMxMrVy5Uj179tSRI0c0d+5cK14OgCpEmMKUxMRErV27VpGRkbr00ksV\nEBCgbt266ZlnntGePXsUHh5+3nmtW7eWdP5TuWVt5zsFbKVLLrlEW7duVUhIiEaPHq0777xTwcHB\n2rp1q/z9/R3jDMNQSUm
"text/plain": [
"<matplotlib.figure.Figure at 0x7f8808605750>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"bipolar_plot()"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Perceptron schemat\n",
"\n",
"<img src=\"perceptron.png\" />"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Perceptron zasada działania\n",
"\n",
"1. Ustal wartości początkowe $\\theta$ (wektor 0 lub liczby losowe blisko 0).\n",
"1. Dla każdego przykładu $(x^{(i)}, y^{(i)})$, dla $i=1,\\ldots,m$\n",
" * Oblicz wartość wyjścia $o^{(i)}$:\n",
" $$o^{(i)} = g(\\theta^{T}x^{(i)}) = g(\\sum_{j=0}^{n} \\theta_jx_j^{(i)})$$\n",
" * Wykonaj aktualizację wag (tzw. _perceptron rule_):\n",
" $$ \\theta := \\theta + \\Delta \\theta $$\n",
" $$ \\Delta \\theta = \\alpha(y^{(i)}-o^{(i)})x^{(i)} $$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"$$\\theta_j := \\theta_j + \\Delta \\theta_j $$\n",
"\n",
"Jeżeli przykład został sklasyfikowany **poprawnie**:\n",
"\n",
"* $y^{(i)}=1$ oraz $o^{(i)}=1$ : $$\\Delta\\theta_j = \\alpha(1 - 1)x_j^{(i)} = 0$$\n",
"* $y^{(i)}=-1$ oraz $o^{(i)}=-1$ : $$\\Delta\\theta_j = \\alpha(-1 - -1)x_j^{(i)} = 0$$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"Czyli: jeżeli trafiłeś, to nic nie zmieniaj."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"$$\\theta_j := \\theta_j + \\Delta \\theta_j $$\n",
"\n",
"Jeżeli przykład został sklasyfikowany **niepoprawnie**:\n",
"\n",
"* $y^{(i)}=1$ oraz $o^{(i)}=-1$ : $$\\Delta\\theta_j = \\alpha(1 - -1)x_j^{(i)} = 2 \\alpha x_j^{(i)}$$\n",
"* $y^{(i)}=-1$ oraz $o^{(i)}=1$ : $$\\Delta\\theta_j = \\alpha(-1 - 1)x_j^{(i)} = -2 \\alpha x_j^{(i)}$$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"Czyli: przesuń wagi w odpowiednią stronę."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Perceptron zalety i wady\n",
"\n",
"Zalety:\n",
"* intuicyjny i prosty\n",
"* łatwy w implementacji\n",
"* jeżeli dane można liniowo oddzielić, algorytm jest zbieżny w skończonym czasie"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"Wady:\n",
"* jeżeli danych nie można oddzielić liniowo, algorytm nie jest zbieżny"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"slideshow": {
"slide_type": "notes"
}
},
"outputs": [],
"source": [
"def plot_perceptron():\n",
" plt.figure(figsize=(12,3))\n",
"\n",
" plt.subplot(131)\n",
" plt.ylim(-0.2,1.2)\n",
" plt.xlim(-0.2,1.2)\n",
"\n",
" plt.title('AND')\n",
" plt.plot([1,0,0], [0,1,0], 'ro', markersize=10)\n",
" plt.plot([1], [1], 'go', markersize=10)\n",
"\n",
" ax = plt.gca()\n",
" ax.spines['right'].set_color('none')\n",
" ax.spines['top'].set_color('none')\n",
" ax.xaxis.set_ticks_position('none')\n",
" ax.spines['bottom'].set_position(('data',0))\n",
" ax.yaxis.set_ticks_position('none')\n",
" ax.spines['left'].set_position(('data',0))\n",
"\n",
" plt.xticks(np.arange(0, 2, 1.0))\n",
" plt.yticks(np.arange(0, 2, 1.0))\n",
"\n",
"\n",
" plt.subplot(132)\n",
" plt.ylim(-0.2,1.2)\n",
" plt.xlim(-0.2,1.2)\n",
"\n",
" plt.plot([1,0,1], [0,1,1], 'go', markersize=10)\n",
" plt.plot([0], [0], 'ro', markersize=10)\n",
"\n",
" ax = plt.gca()\n",
" ax.spines['right'].set_color('none')\n",
" ax.spines['top'].set_color('none')\n",
" ax.xaxis.set_ticks_position('none')\n",
" ax.spines['bottom'].set_position(('data',0))\n",
" ax.yaxis.set_ticks_position('none')\n",
" ax.spines['left'].set_position(('data',0))\n",
"\n",
" plt.title('OR')\n",
" plt.xticks(np.arange(0, 2, 1.0))\n",
" plt.yticks(np.arange(0, 2, 1.0))\n",
"\n",
"\n",
" plt.subplot(133)\n",
" plt.ylim(-0.2,1.2)\n",
" plt.xlim(-0.2,1.2)\n",
"\n",
" plt.title('XOR')\n",
" plt.plot([1,0], [0,1], 'go', markersize=10)\n",
" plt.plot([0,1], [0,1], 'ro', markersize=10)\n",
"\n",
" ax = plt.gca()\n",
" ax.spines['right'].set_color('none')\n",
" ax.spines['top'].set_color('none')\n",
" ax.xaxis.set_ticks_position('none')\n",
" ax.spines['bottom'].set_position(('data',0))\n",
" ax.yaxis.set_ticks_position('none')\n",
" ax.spines['left'].set_position(('data',0))\n",
"\n",
" plt.xticks(np.arange(0, 2, 1.0))\n",
" plt.yticks(np.arange(0, 2, 1.0))\n",
"\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAArMAAADJCAYAAAAwwbqVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAFkxJREFUeJzt3X9w1Hedx/HXO7ggIbc0tFx7tmfh\nAH8AB9bGG7BSbKt3MNdmOhj8MQdqO9Z2MnJeyOjoHfXsDHPlqhxU53rl8AcYblS6dYSxRY/rSVUs\no6TVOEEqBFKg2iqlV0hi2y187o/PF9hsNsl+k3x395M8HzPf2fL9fva7n/32886+9vv97vdrzjkB\nAAAAIaoqdwcAAACAoSLMAgAAIFiEWQAAAASLMAsAAIBgEWYBAAAQLMIsAAAAgkWYBQAAQLAIs2Vg\nZg+bmTOzgwO0cdH0pJlZgeV10fItefO35DzXmVnWzE6Z2a+iZTeb2bgE3hYwppjZe80sY2bPmtkr\nZnbSzH5oZh83s9cVaN+ZV5tnzewPZrbLzP6mHO8BGC3MrDmqq3/vZ/lfmFmXmR03s8l5y95iZpvM\n7LCZ/dHMTkefvZ83s0v6Wd+2vHo+Z2YvmdkTZnaXmZGvSsi4aUJpmdllkn4r6XWSTNK7nHN7C7TL\n/R/zIefct/KW10n6uaStzrmP5szfIukjkjZHr1MlKS3prZIWSZoo6WeSPuCc6xyp9wWMFVFQ3STp\ndkldkr4n6aikSyUtlfTnkvZLutk593zO8zolvUHSv0SzJkiaLelm+Tpd6ZzbVpp3AYwuUXj8saSF\nkm5yzv0wZ5lJ2iPpeklLnHM/yFnWKOlL0T93S2qTr83Fkt4m6feSbnHO/Szv9bZJ+jv5vwXPydfw\n1ZKWSaqRtMk5d9eIv1EU5pxjKuEkqUmSk/TF6PGr/bRzkn4nqVvSIUmvy1teF7XZkjd/SzS/rsA6\nL5XUEi0/KKmm3NuDiSm0SdK/RTX0hKQr8pZNkPQfOctTOcs6JXUVWN/7o/bPlPu9MTGFPEl6k6Qe\n+S+XNTnz/yGqsc157ZflfNZeW2B9d0h6TdJJSVflLdsWPfdtefNnRZ/b5yRdXe5tMlYmdoOX3m2S\nzkj6nKR2Se83s0n9tH1B0v2SZsoX1bA4516Q9GFJ/yPpzZI+Mdx1AmOJmb1Z0ifla7PeOfdc7nLn\n3CuSGiX9SNIC+aMkg3lIfg/vG81s6sj2GBg7nHO/kbRG0jRJX5AkM3uT/NGQY5Kaz7c1s5T856vk\nj1S2FljfZkn3ye8IWltkHw5J+on8kde3D/GtICbCbAmZ2Tsk/aWk7zjneuT3ktbI75npz79KOiXp\nc2ZWPdw+OP/V8fxhzoFeF0BfH5H/u/mfzrk/FGqQV2O3xVx/dhh9AyBtlPRTSXea2V9L2ip/et3H\nnHOnc9q9V9JVkn7snPvRAOv7oqRXJX3QzF4fsy/Uc4kQZkvr9uixJXr8L/lDEbcXbi45516StE7S\nFfKHSkbCT+UPncwv9EMVAP1aGD0+Nki7H8nX2DuK+MHlcvkvte3Ouf8bZv+AMc05d07+S+TLknbK\nHyHZ5Jzbnde0qFp2zp2S9Av5U4gG3dNqZrMkvUvSK/LnzqMECDIlEn2j+6CkZyX9UJKccyfMbI+k\nG81sVnR4opAvS/p7SZ82swej4hoy59wrZvaCpMslTZE/wR3A4K6IHk8M1Mg598ecGrtUF2tsvJl9\n/vx/y/8A7Bb5c+waR7y3wBjknPuNmW2VdJf8kc1PFWhWVC1Hjkv6q5zn5LrLzM7/AOyNkt4nqVpS\nU/5pSEgOYbZ03ifpEvkT0M/lzG+RdKP83tnPFnqic+5lM7tH/goFn1XhwgRQ+VKS/jlvXo/8L6x/\nUob+AKOOmV0tf6UBye+wuV7SIwm93J0F5n3SOfelAvOREE4zKJ3zpxLkX3rnYfkPsw8Pcjjy6/JX\nIPiEmV01nI6Y2QT5vUVn5b+1AijO+T0tA9ZgdCTmUvlz7V7IWdTtnDPnnEmaLOkD8qcaPTzcugZw\n4TJcX5X0J/Kn5v1R0qb8a8uqyFrOa/O7Asuuiep5ovzlLw9KWh+dr4sSIcyWgJlNk3RD9M9f5l5o\nWdJp+UMSb5C0pL91OOfOyv9K8/WSPj/MLr1Tfq/8L51zrw1zXcBY8kT0eNMg7a6Xr7GfR7Xbh3Pu\ntHNuu6SPSfpTSQUv9g4glrvk63OLc+5+Sf8k6UpJG/LaFVXLZlYrf73ZVyQ91V8759zL0dGVm+V3\nFH3VzCYO6R0gNsJsadwmf5mOx+W/MeZP34na9ftDMElyzj0sf8ODj0p6y1A6En1r/cfon98eyjqA\nMewb8teWvMPMLi3UIKqx86cMfX2wFTrnvi1pn6R6M3vnSHUUGGuiHUf3yd8wqCmafb+kvZJuy7vT\n3m7537AsMrN3DbDaZvkff33TOffyYH1wznXI/87lKkmrYr4FDBFhNmHRXUk+In8ocYVz7mP5k/wl\nsn4n6ZboDmED+aykcRrC3lkzmyJ/mZL3SHpa0gNx1wGMZc65g/J3C7pM0g4zuzx3uZmNl/8ge7d8\nQP1Gkau+J+8RQAzRl8ivyV8Z5OPnrwwS/UbldvnTDTabWTqan9XFwLvdzK4psM7bJX1G/lShu2N0\n5wvypw9+ysxqhvaOEAc/AEveTfK3uPuBc67gryadc2fNrEXSpyWtVN/DIblt/9fM/lvSYOfjfNzM\nbpbfI5yW35O7WL1vZ9sV980A0Kck1crfgOSQmeXfzvaNklol3Rp9YA7KOfd9M/uZpPeY2SLn3I+T\n6TowajXKn863xTnX68de0dUN7pa/ZuwXFP1oyzn3kJl9Uv6ufj+PPlvb5K808m5J1+ji7WyLuerB\n+df7vZltkg/LqyTdO8z3hkGYv743kmJm35S/JNeHnHPfGqDdWyT9WtKvnHPzovNp251zcwu0fbv8\n9etM0lbn3Edzlm1R77sOnZW/49gJ+Q/YjKRH866oACCm6JDlnfLXsbxM/i5ebZK+Kelr+UHWzDol\nXeacK7inxsz+VtL3JO1xzt1QqA2AvsxsuqRfSXpJ0pxC12uOjpL+RP76su9xzj2Ws2y2fPC8Uf78\n2qykw/LXqd3onHuxwPq2yV8x4Rrn3C8KLL9C/ktuj6Rpzrkzw32f6B9hFgAAAMHinFkAAAAEizAL\nAACAYBFmAQAAECzCLAAAAIIVN8y6Uk5Lliwp6esxMQ1hqmQl3RbUK1MAUyUr6bagXpkCmIpW0Xtm\nT548We4uACgS9QqEg3rFaFLRYRYAAAAYCGEWAAAAwSLMAgAAIFiEWQAAAASLMAsAAIBgEWYBAAAQ\nLMIsAAAAgkWYBQAAQLAIswAAAAgWYRYAAADBIswCAAAgWIRZAAAABIswCwAAgGARZgEAABAswiwA\nAACCRZgFAABAsBINsydOnNCqVau0cOFCVVdXy8zU2dmZ5EsCGAZqFggH9Qp4iYbZw4cPa/v27aqt\nrdWiRYuKe1JHh9TYKKXTUmurf2xs9POBAjpOdajxkUal702r6p4qpe9Nq/GRRnWcYszEFbdmc7d9\n629b2fYYFPU6cqhXJC43k1VVVWwmM+dcnPaxGp87d05VVT4vf+UrX9Edd9yho0ePatq0aYWfsGuX\n1NAgZbNSNqs6SfslKZXyUyYjLV0apwsY5XYd2qWGhxqUPZtV9lz2wvxUVUqpcSlllme0dFaiY8aS\nXPkwxapXKV7N9tn2myTdWdJtj8BQrwOiXlFZ8jLZBaXLZEXXa6J7Zs8XWVE6OvxG6+npvdEk/++e\nHr+8wr4NoHw6TnWo4aEG9WR7en0wSlL2XFY92R41PNTAXocYiq1Ztj3iYsyMPOoViQksk1XOD8DW\nr++7wfJls9KGDaXpDyre+ifWK3t24DGTPZvVhn2MmZHGtkdcjJnyYdsjtsAyWeWE2W3bittwLS2l\n6Q8q3ra2bX32MuTLnsu
"text/plain": [
"<matplotlib.figure.Figure at 0x7f880643ba90>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot_perceptron()"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Funkcje aktywacji\n",
"\n",
"Zamiast funkcji bipolarnej możemy zastosować funkcję sigmoidalną jako funkcję aktywacji."
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"slideshow": {
"slide_type": "notes"
}
},
"outputs": [],
"source": [
"def plot_activation_functions():\n",
" plt.figure(figsize=(16,7))\n",
" plt.subplot(121)\n",
" x = [-2,-.23,2] \n",
" y = [-1, -1, 1]\n",
" plt.ylim(-1.2,1.2)\n",
" plt.xlim(-2.2,2.2)\n",
" plt.plot([-2,2],[1,1], color='black', ls=\"dashed\")\n",
" plt.plot([-2,2],[-1,-1], color='black', ls=\"dashed\")\n",
" plt.step(x, y, lw=3)\n",
" ax = plt.gca()\n",
" ax.spines['right'].set_color('none')\n",
" ax.spines['top'].set_color('none')\n",
" ax.xaxis.set_ticks_position('bottom')\n",
" ax.spines['bottom'].set_position(('data',0))\n",
" ax.yaxis.set_ticks_position('left')\n",
" ax.spines['left'].set_position(('data',0))\n",
"\n",
" plt.annotate(r'$\\theta_0$',\n",
" xy=(-.23,0), xycoords='data',\n",
" xytext=(-50, +50), textcoords='offset points', fontsize=26,\n",
" arrowprops=dict(arrowstyle=\"->\"))\n",
"\n",
" plt.subplot(122)\n",
" x2 = np.linspace(-2,2,100)\n",
" y2 = np.tanh(x2+ 0.23)\n",
" plt.ylim(-1.2,1.2)\n",
" plt.xlim(-2.2,2.2)\n",
" plt.plot([-2,2],[1,1], color='black', ls=\"dashed\")\n",
" plt.plot([-2,2],[-1,-1], color='black', ls=\"dashed\")\n",
" plt.plot(x2, y2, lw=3)\n",
" ax = plt.gca()\n",
" ax.spines['right'].set_color('none')\n",
" ax.spines['top'].set_color('none')\n",
" ax.xaxis.set_ticks_position('bottom')\n",
" ax.spines['bottom'].set_position(('data',0))\n",
" ax.yaxis.set_ticks_position('left')\n",
" ax.spines['left'].set_position(('data',0))\n",
"\n",
" plt.annotate(r'$\\theta_0$',\n",
" xy=(-.23,0), xycoords='data',\n",
" xytext=(-50, +50), textcoords='offset points', fontsize=26,\n",
" arrowprops=dict(arrowstyle=\"->\"))\n",
"\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA5IAAAGRCAYAAAAXeoyVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzs3XlYlPXex/HPALKjgOCSKwpu4I7m\ncsxSK4/rqaz0pE/aomZppi1Wbsc0ray0U25tltVJs0yzNFMz23zMXcFdUdEQFFFkh7mfPzyO8aAC\nMnAPM+/XdXHp73cv8wVHvny4N4thGAIAAAAAoKjczC4AAAAAAFC+ECQBAAAAAMVCkAQAAAAAFAtB\nEgAAAABQLARJAAAAAECxECQBAAAAAMVCkAQAAAAAFAtBEgAAAABQLARJAAAAAECxeBRzfaNUqgCc\nTPfu3bV69WqzywDKA4vZBTgBejNQBPRmoMiK1Js5IgmUgjNnzphdAgAA+At6M2BfBEkAAAAAQLEQ\nJAEAAAAAxUKQBAAAAAAUC0ESAAAAAFAsBEkAAAAAQLEQJAEAAAAAxUKQBAAAAAAUC0ESAAAAAFAs\nBEkAAAAAQLEQJAEAAAAAxUKQBAAAAAAUC0ESAAAAAFAsBEkAAAAAQLEQJAEAAAAAxUKQBAAAAAAU\nC0ESAAAAAFAsBEkAAAAAQLEQJAEAAAAAxUKQBAAAAAAUC0ESAAAAAFAsBEkAAAAAQLEQJAEAAAAA\nxUKQBAAAAAAUC0ESAAAAAFAsBEk4rfj4eI0cOVLt27eXr6+vLBaL4uLiirSt1WrV9OnTVbduXXl7\ne6t58+b68ssvS7dgAACcHL0ZcB4ESTitQ4cOacmSJQoKClKnTp2Kte2ECRM0efJkPfHEE1q1apXa\ntWune++9V999910pVQsAgPOjNwPOw2IYRnHWL9bKgJmsVqvc3C79ruS9997To48+qqNHj6pu3brX\n3S4xMVG1atXSuHHj9K9//cs237VrVyUlJWnXrl2FvnZ0dLS2bNlSovoBF2ExuwAnQG9GuUFvBsqF\nIvVmjkjCaV1uVMX1/fffKzs7WwMHDsw3P3DgQO3evVtHjx61R3kAALgcejPgPEw5InnrrbcWmLvv\nvvs0YsQIpaenq0ePHgWWDx48WIMHD9aZM2fUr1+/Assfe+wx3X///Tpx4oQGDRpUYPnYsWPVu3dv\n7d+/X8OGDSuwfPz48erWrZt27Nih0aNHF1j+8ssvq0OHDvrtt9/0wgsvFFg+a9YstWjRQmvXrtXU\nqVMLLJ8/f74aNmyob775Rq+//nqB5YsWLVKtWrW0ePFizZ07t8DypUuXKiQkRAsXLtTChQsLLP/u\nu+/k6+urOXPmaMmSJQWWb9iwQZI0c+ZMrVy5Mt8yHx8frVq1SpL00ksvad26dfmWV65c2XYNwvPP\nP6/ff/893/KaNWvqk08+kSSNHj1aO3bsyLe8QYMGWrBggSRp6NChOnDgQL7lLVq00KxZsyRdagjx\n8fH5lrdv317Tp0+XJN1zzz06e/ZsvuVdu3bVhAkTJEl///vflZGRkW95r169FBgYqEcffVQ333yz\nvL29JUnnq0crpWZHGe6esrc/Pxqt6g/Osvt+AUcSN6OnPXbDEcmSozfTm52mN1/2/997R44cUXx8\nvG655RZJV957a9as0Z133qmoqChVrlzZtv3V3ntbt25V69atJfHec/X33tNPPy3Jub7vXf43tZMi\n9WYPe74iUJ6UVogEAAD2lZubKw+Pgj+2BgYG2pZfzaeffqqtW7dKknJyckqvQMAFcY0kXMLVrsOo\nO+7bUns9jkjCFXBE0mHQm1EuFecayaFDh2rFihVKSEjIN3/o0CFFRETo448/vurRn7/iGkmgyDgi\nCRTVX38gfu655zR79mxlZGTIYrny/2jz5s26+eabtXLlSvXsef0foKPXTtIW+/yQDQCAywsKClJK\nSooMw8jXm5OTkyVJwcHBZpUGlLnsXKuS07J1Ni1L59JydDYtS8lp2WpeK1CtageVWR0ESeD/iYyM\nVFZWlg4fPqzw8HDbfGxsrCSpSZMmZpUGAIBLojfD2RmGofMZOTp9IUunL2Qq4UKmklKz8n2cuXjp\n40Lm1U/lHtUlnCAJmKl79+6qUKGCPv30U02aNMk2/8knnygqKkphYWEmVgcAgOuhN6O8y8zJ08mU\nDJ08l2H781RKhv48n6k/z1/6MyvXWqLXOJuWbadqi4YgCae2dOlSSbJdaL9q1SqFhoYqNDQ033oP\nP/yw3n//fUlSlSpVNGbMGE2fPl0BAQFq1aqVFi9erPXr12vFihVl+wkAAOBkrtebO3fuLEny8PDQ\ngw8+SG9GuZKSnq0jZ9J0/Gy64s6m6djZdB1PTteJ5HQlpmbZ7XXcLFKwn5cq+3kq+C8fN9cr21O8\nCZJwavfee2++8YgRIyTpUqNq94xtPi8vL99606ZNk7+/v2bPnq2EhAQ1bNhQS5YsUa9evUq/aAAA\nnNj1evPlRxjk5eXRm+GQrFZD8ecydDAxVYcSL+pQ4kUdOZOmI0kXdS69ZHcG9vN0V9VK3qoa4K1q\nlbwVGuClKgFeCg3wUqi/l0ICvBTi76VAnwpyczP/XnXctRUu6693bbXT3SdtuDMcUGTmd8Lyj94M\nFAG9GcWVkp6t2FMXFPvnBe1PSNX+06k6cDpVmTnFPwXVzSJVr+SjmkE+qhHko5qBProp0EfVA31U\nvZK3qlfyVoB3hVL4LG4Id20FAAAAgMIkp2VrZ3yKdsef1+6T5xV76oJOpmQUax/eFdxUt7Kf6lb2\nU50QX9Wt7Kfawb6qFeSr6oHequDuVkrVm4MgCQAAAMBlZOdaFXPqvLYfT9G24+e040SK4s8VPTRW\n9vNUeBV/RVT1V3iov+pX8Ve9UH9Vr+jtEKeclhWCJAAAAACndTErV1uPndMfR5O1OS5ZO0+kFOkO\nqZ7ubmpYLUCNqweoUbWKalQtQA2rBaiyv1cZVO34CJIAAAAAnEZmTp62Hjun3w6f0e+Hz2pn/Hnl\nWa9/Obmnh5uaVK+o5jUrqWnNQEXVqKj6of5OdzqqPREkAQAAAJRbhmHoUOJF/XQgSRsPntH/Hjlb\n6BHHOpV91ap2kFrWDlTLWkFqVD2A0FhMBEkAAAAA5UpWbp7+90iy1u9L1Lp9p3Ui+drXOFosUqNq\nFXVzWLDa1A1Wm7pBqlLRuwyrdU4ESQAAAAAOLy0rVxv2J2l1TIJ+3Jeoi1m511y3Xoif/hYRog71\nK+vmsMoK8vMsw0pdA0ESAAAAgENKz87Vur2J+mbnKW04kKTsa5yy6u/lob+Fh+iWBqG6pUGIagb5\nlnGlrocgCQAAAMBh5OZZtfFgkr7efkpr955WenbeVderU9lXXRtVVdfGVdSmbrA8PbjGsSwRJAEA\nAACYLvbUBX21LV5f7zilMxezrrpOo2oB6h5VTd2jqqlh1QBZLK7z3EZHQ5AEAAAAYIq0rFx9s/OU\n/rP5uHbGn7/qOuFV/NWn+U3q1ay66oX6l3GFuBaCJAAAAIAytT8hVR/9Hqfl208q7SqnrlYJ8NJd\nLWuob4saalydI4+OiCAJAAAAoNTl5lm1dm+iFv52VJuOJBdY7unupjujqqlf65rqWL+yPHiuo0Mj\nSAIAAAAoNenZuVryxwm9/+vRqz7vMbyKvwa0ra27W9bgMR3lCEESAAAAgN2dvZilj36L08ebjikl\nPSffMnc3i+6MrKoH29dV27BgTl0thwiSAAAAAOwmMTVT7248ok82HVdGTv7rHwN9K+iBm2vrgZvr\n6KZAH5MqhD0QJAEAAACUWGJqpub8eFj/2XxcWbnWfMtqB/vqkU5h6te6pnw9iSDOgH9FAAAAADfs\nfHqO5m08rA9/ParMnPwBsnH1inr8tvrqHlmNm+c4GYIkAAAAgGLLzMnT+78c1byfDis1MzffsqY1\nKmlU1wh1a1yF6x+dFEE
"text/plain": [
"<matplotlib.figure.Figure at 0x7f88064ca690>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot_activation_functions()"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Perceptron a regresja liniowa"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"<img src=\"reglin.png\" />"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Uczenie regresji liniowej:\n",
"* Model: $$h_{\\theta}(x) = \\sum_{i=0}^n \\theta_ix_i$$\n",
"* Funkcja kosztu (błąd średniokwadratowy): $$J(\\theta) = \\frac{1}{m} \\sum_{i=1}^{m} (h_{\\theta}(x^{(i)}) - y^{(i)})^2$$\n",
"\n",
"* Po obliczeniu $\\nabla J(\\theta)$, zwykły SGD."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Perceptron a dwuklasowa regresja logistyczna"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<img src=\"reglog.png\" />"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Uczenie dwuklasowej regresji logistycznej:\n",
"* Model: $$h_{\\theta}(x) = \\sigma(\\sum_{i=0}^n \\theta_ix_i) = P(1|x,\\theta)$$\n",
"* Funkcja kosztu (entropia krzyżowa): $$\\begin{eqnarray} J(\\theta) &=& -\\frac{1}{m} \\sum_{i=1}^{m} [y^{(i)}\\log P(1|x^{(i)},\\theta) \\\\ && + (1-y^{(i)})\\log(1-P(1|x^{(i)},\\theta))]\\end{eqnarray}$$\n",
"\n",
"* Po obliczeniu $\\nabla J(\\theta)$, zwykły SGD."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Perceptron a wieloklasowa regresja logistyczna"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"<img src=\"multireglog.png\" />"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Wieloklasowa regresji logistyczna\n",
"* Model (dla $c$ klasyfikatorów binarnych): \n",
"$$\\begin{eqnarray}\n",
"h_{(\\theta^{(1)},\\dots,\\theta^{(c)})}(x) &=& \\mathrm{softmax}(\\sum_{i=0}^n \\theta_{i}^{(1)}x_i, \\ldots, \\sum_{i=0}^n \\theta_i^{(c)}x_i) \\\\ \n",
"&=& \\left[ P(k|x,\\theta^{(1)},\\dots,\\theta^{(c)}) \\right]_{k=1,\\dots,c} \n",
"\\end{eqnarray}$$\n",
"* Funkcja kosztu (**przymując model regresji binarnej**): $$\\begin{eqnarray} J(\\theta^{(k)}) &=& -\\frac{1}{m} \\sum_{i=1}^{m} [y^{(i)}\\log P(k|x^{(i)},\\theta^{(k)}) \\\\ && + (1-y^{(i)})\\log P(\\neg k|x^{(i)},\\theta^{(k)})]\\end{eqnarray}$$\n",
"\n",
"* Po obliczeniu $\\nabla J(\\theta)$, **c-krotne** uruchomienie SGD, zastosowanie $\\mathrm{softmax}(X)$ do niezależnie uzyskanych klasyfikatorów binarnych."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"* Przyjmijmy: \n",
"$$ \\Theta = (\\theta^{(1)},\\dots,\\theta^{(c)}) $$\n",
"\n",
"$$h_{\\Theta}(x) = \\left[ P(k|x,\\Theta) \\right]_{k=1,\\dots,c}$$\n",
"\n",
"$$\\delta(x,y) = \\left\\{\\begin{array}{cl} 1 & \\textrm{gdy } x=y \\\\ 0 & \\textrm{wpp.}\\end{array}\\right.$$\n",
"\n",
"* Wieloklasowa funkcja kosztu $J(\\Theta)$ (kategorialna entropia krzyżowa):\n",
"$$ J(\\Theta) = -\\frac{1}{m}\\sum_{i=1}^{m}\\sum_{k=1}^{c} \\delta({y^{(i)},k}) \\log P(k|x^{(i)},\\Theta) $$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"* Gradient $\\nabla J(\\Theta)$:\n",
"$$ \\dfrac{\\partial J(\\Theta)}{\\partial \\Theta_{j,k}} = -\\frac{1}{m}\\sum_{i = 1}^{m} (\\delta({y^{(i)},k}) - P(k|x^{(i)}, \\Theta)) x^{(i)}_j \n",
"$$\n",
"\n",
"* Liczymy wszystkie wagi jednym uruchomieniem SGD"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"## Podsumowanie\n",
"\n",
"* W przypadku jednowarstowej sieci neuronowej wystarczy znać gradient funkcji kosztu.\n",
"* Wtedy liczymy tak samo jak w przypadku regresji liniowej, logistycznej, wieloklasowej logistycznej itp.\n",
" * Wymienione modele to szczególne przypadki jednowarstwowych sieci neuronowych.\n",
"* Regresja liniowa i binarna regresja logistyczna to jeden neuron.\n",
"* Wieloklasowa regresja logistyczna to tyle neuronów ile klas."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"Funkcja aktywacji i funkcja kosztu są **dobierane do problemu**."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"## 5.2. Wielowarstwowe sieci neuronowe\n",
"\n",
"czyli _Artificial Neural Networks_ (ANN) lub _Multi-Layer Perceptrons_ (MLP)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"<img src=\"nn1.png\" />"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Architektura sieci\n",
"\n",
"* Sieć neuronowa jako graf neuronów. \n",
"* Organizacja sieci przez warstwy.\n",
"* Najczęściej stosowane są sieci jednokierunkowe i gęste."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"* $n$-warstwowa sieć neuronowa ma $n+1$ warstw (nie liczymy wejścia).\n",
"* Rozmiary sieci określane poprzez liczbę neuronów lub parametrów."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Sieć neuronowa jednokierunkowa (_feedforward_)\n",
"\n",
"* Mając daną $n$-warstwową sieć neuronową oraz jej parametry $\\Theta^{(1)}, \\ldots, \\Theta^{(L)} $ oraz $\\beta^{(1)}, \\ldots, \\beta^{(L)} $ liczymy:<br/><br/> \n",
"$$a^{(l)} = g^{(l)}\\left( a^{(l-1)} \\Theta^{(l)} + \\beta^{(l)} \\right). $$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"<img src=\"nn2.png\" />"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"* Funkcje $g^{(l)}$ to tzw. **funkcje aktywacji**.<br/>\n",
"Dla $i = 0$ przyjmujemy $a^{(0)} = \\mathrm{x}$ (wektor wierszowy cech) oraz $g^{(0)}(x) = x$ (identyczność)."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"* Parametry $\\Theta$ to wagi na połączeniach miedzy neuronami dwóch warstw.<br/>\n",
"Rozmiar macierzy $\\Theta^{(l)}$, czyli macierzy wag na połączeniach warstw $a^{(l-1)}$ i $a^{(l)}$, to $\\dim(a^{(l-1)}) \\times \\dim(a^{(l)})$."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"* Parametry $\\beta$ zastępują tutaj dodawanie kolumny z jedynkami do macierzy cech.<br/>Macierz $\\beta^{(l)}$ ma rozmiar równy liczbie neuronów w odpowiedniej warstwie, czyli $1 \\times \\dim(a^{(l)})$."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"* **Klasyfikacja**: dla ostatniej warstwy $L$ (o rozmiarze równym liczbie klas) przyjmuje się $g^{(L)}(x) = \\mathop{\\mathrm{softmax}}(x)$.\n",
"* **Regresja**: pojedynczy neuron wyjściowy jak na obrazku. Funkcją aktywacji może wtedy być np. funkcja identycznościowa."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"* Pozostałe funkcje aktywacji najcześciej mają postać sigmoidy, np. sigmoidalna, tangens hiperboliczny.\n",
"* Mogą mieć też inny kształt, np. ReLU, leaky ReLU, maxout."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"## 5.3. Metoda propagacji wstecznej wprowadzenie"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Jak uczyć sievi neuronowe?"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"* W poznanych do tej pory algorytmach (regresja liniowa, regresja logistyczna) do uczenia używaliśmy funkcji kosztu, jej gradientu oraz algorytmu gradientu prostego (GD/SGD)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"* Dla sieci neuronowych potrzebowalibyśmy również znaleźć gradnient funkcji kosztu."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"* Co sprowadza się do bardziej ogólnego problemu:<br/>jak obliczyć gradient $\\nabla f(x)$ dla danej funkcji $f$ i wektora wejściowego $x$?"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Pochodna funkcji\n",
"\n",
"* **Pochodna** mierzy, jak szybko zmienia się wartość funkcji względem zmiany jej argumentów:\n",
"\n",
"$$ \\frac{d f(x)}{d x} = \\lim_{h \\to 0} \\frac{ f(x + h) - f(x) }{ h } $$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Pochodna cząstkowa i gradient\n",
"\n",
"* **Pochodna cząstkowa** mierzy, jak szybko zmienia się wartość funkcji względem zmiany jej *pojedynczego argumentu*."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"* **Gradient** to wektor pochodnych cząstkowych:\n",
"\n",
"$$ \\nabla f = \\left( \\frac{\\partial f}{\\partial x_1}, \\ldots, \\frac{\\partial f}{\\partial x_n} \\right) $$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Gradient przykłady\n",
"\n",
"$$ f(x_1, x_2) = x_1 + x_2 \\qquad \\to \\qquad \\frac{\\partial f}{\\partial x_1} = 1, \\quad \\frac{\\partial f}{\\partial x_2} = 1, \\quad \\nabla f = (1, 1) $$ "
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"$$ f(x_1, x_2) = x_1 \\cdot x_2 \\qquad \\to \\qquad \\frac{\\partial f}{\\partial x_1} = x_2, \\quad \\frac{\\partial f}{\\partial x_2} = x_1, \\quad \\nabla f = (x_2, x_1) $$ "
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"$$ f(x_1, x_2) = \\max(x_1 + x_2) \\hskip{12em} \\\\\n",
"\\to \\qquad \\frac{\\partial f}{\\partial x_1} = \\mathbb{1}_{x \\geq y}, \\quad \\frac{\\partial f}{\\partial x_2} = \\mathbb{1}_{y \\geq x}, \\quad \\nabla f = (\\mathbb{1}_{x \\geq y}, \\mathbb{1}_{y \\geq x}) $$ "
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Własności pochodnych cząstkowych\n",
"\n",
"Jezeli $f(x, y, z) = (x + y) \\, z$ oraz $x + y = q$, to:\n",
"$$f = q z,\n",
"\\quad \\frac{\\partial f}{\\partial q} = z,\n",
"\\quad \\frac{\\partial f}{\\partial z} = q,\n",
"\\quad \\frac{\\partial q}{\\partial x} = 1,\n",
"\\quad \\frac{\\partial q}{\\partial y} = 1 $$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Reguła łańcuchowa\n",
"\n",
"$$ \\frac{\\partial f}{\\partial x} = \\frac{\\partial f}{\\partial q} \\, \\frac{\\partial q}{\\partial x},\n",
"\\quad \\frac{\\partial f}{\\partial y} = \\frac{\\partial f}{\\partial q} \\, \\frac{\\partial q}{\\partial y} $$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Propagacja wsteczna prosty przykład"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [],
"source": [
"# Dla ustalonego wejścia\n",
"x = -2; y = 5; z = -4"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(3, -12)\n"
]
}
],
"source": [
"# Krok w przód\n",
"q = x + y\n",
"f = q * z\n",
"print(q, f)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-4, -4, 3]\n"
]
}
],
"source": [
"# Propagacja wsteczna dla f = q * z\n",
"dz = q\n",
"dq = z\n",
"# Propagacja wsteczna dla q = x + y\n",
"dx = 1 * dq # z reguły łańcuchowej\n",
"dy = 1 * dq # z reguły łańcuchowej\n",
"print([dx, dy, dz])"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"<img src=\"exp1.png\" />"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"* Właśnie tak wygląda obliczanie pochodnych metodą propagacji wstecznej!"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"* Spróbujmy czegoś bardziej skomplikowanego:<br/>metodą propagacji wstecznej obliczmy pochodną funkcji sigmoidalnej."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Propagacja wsteczna funkcja sigmoidalna"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"Funkcja sigmoidalna:\n",
"\n",
"$$f(\\theta,x) = \\frac{1}{1+e^{-(\\theta_0 x_0 + \\theta_1 x_1 + \\theta_2)}}$$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"<img src=\"exp2.png\" />"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0.3932238664829637, -0.5898357997244456]\n",
"[-0.19661193324148185, -0.3932238664829637, 0.19661193324148185]\n"
]
}
],
"source": [
"import math\n",
"\n",
"# Losowe wagi i dane\n",
"w = [2,-3,-3]\n",
"x = [-1, -2]\n",
"\n",
"# Krok w przód\n",
"dot = w[0]*x[0] + w[1]*x[1] + w[2]\n",
"f = 1.0 / (1 + math.exp(-dot)) # funkcja sigmoidalna\n",
"\n",
"# Krok w tył\n",
"ddot = (1 - f) * f # pochodna funkcji sigmoidalnej\n",
"dx = [w[0] * ddot, w[1] * ddot]\n",
"dw = [x[0] * ddot, x[1] * ddot, 1.0 * ddot]\n",
"\n",
"print(dx)\n",
"print(dw)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Obliczanie gradientów podsumowanie\n",
"\n",
"* Gradient $f$ dla $x$ mówi jak zmieni się całe wyrażenie przy zmianie wartości $x$.\n",
"* Gradienty łączymy korzystając z **reguły łańcuchowej**.\n",
"* W kroku wstecz gradienty informują, które części grafu powinny być zwiększone lub zmniejszone (i z jaką siłą), aby zwiększyć wartość na wyjściu.\n",
"* W kontekście implementacji chcemy dzielić funkcję $f$ na części, dla których można łatwo obliczyć gradienty."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"## 5.4. Uczenie wielowarstwowych sieci neuronowych metodą propagacji wstecznej"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"Mając algorytm SGD oraz gradienty wszystkich wag, moglibyśmy trenować każdą sieć."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"* Niech:\n",
"$$\\Theta = (\\Theta^{(1)},\\Theta^{(2)},\\Theta^{(3)},\\beta^{(1)},\\beta^{(2)},\\beta^{(3)})$$\n",
"\n",
"* Funkcja sieci neuronowej z grafiki:\n",
"\n",
"$$\\small h_\\Theta(x) = \\tanh(\\tanh(\\tanh(x\\Theta^{(1)}+\\beta^{(1)})\\Theta^{(2)} + \\beta^{(2)})\\Theta^{(3)} + \\beta^{(3)})$$\n",
"* Funkcja kosztu dla regresji:\n",
"$$J(\\Theta) = \\dfrac{1}{2m} \\sum_{i=1}^{m} (h_\\Theta(x^{(i)})- y^{(i)})^2 $$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"* Jak obliczymy gradienty?\n",
"\n",
"$$\\nabla_{\\Theta^{(l)}} J(\\Theta) = ? \\quad \\nabla_{\\beta^{(l)}} J(\\Theta) = ?$$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### W kierunku propagacji wstecznej\n",
"\n",
"* Pewna (niewielka) zmiana wagi $\\Delta z^l_j$ dla $j$-ego neuronu w warstwie $l$ pociąga za sobą (niewielką) zmianę kosztu: \n",
"\n",
"$$\\frac{\\partial J(\\Theta)}{\\partial z^{l}_j} \\Delta z^{l}_j$$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"* Jeżeli $\\frac{\\partial J(\\Theta)}{\\partial z^{l}_j}$ jest duża, $\\Delta z^l_j$ ze znakiem przeciwnym zredukuje koszt.\n",
"* Jeżeli $\\frac{\\partial J(\\Theta)}{\\partial z^l_j}$ jest bliska zeru, koszt nie będzie mocno poprawiony."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"* Definiujemy błąd $\\delta^l_j$ neuronu $j$ w warstwie $l$: \n",
"\n",
"$$\\delta^l_j \\equiv \\dfrac{\\partial J(\\Theta)}{\\partial z^l_j}$$ \n",
"$$\\delta^l \\equiv \\nabla_{z^l} J(\\Theta) \\textrm{ (zapis wektorowy)} $$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Podstawowe równania propagacji wstecznej\n",
"\n",
"$$\n",
"\\begin{array}{ccll}\n",
"\\delta^L & = & \\nabla_{a^L}J(\\Theta) \\odot {(g^{L})}^{\\prime}(z^L) & (BP1) \\\\[2mm]\n",
"\\delta^{l} & = & ((\\Theta^{l+1})^T \\delta^{l+1}) \\odot {{(g^{l})}^{\\prime}}(z^{l}) & (BP2)\\\\[2mm]\n",
"\\nabla_{\\beta^l} J(\\Theta) & = & \\delta^l & (BP3)\\\\[2mm]\n",
"\\nabla_{\\Theta^l} J(\\Theta) & = & a^{l-1} \\odot \\delta^l & (BP4)\\\\\n",
"\\end{array}\n",
"$$\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Algorytm propagacji wstecznej"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"Dla jednego przykładu (x,y):\n",
"\n",
"1. **Wejście**: Ustaw aktywacje w warstwie cech $a^{(0)}=x$ \n",
"2. **Feedforward:** dla $l=1,\\dots,L$ oblicz \n",
"$$z^{(l)} = a^{(l-1)} \\Theta^{(l)} + \\beta^{(l)} \\textrm{ oraz } a^{(l)}=g^{(l)}(z^{(l)})$$\n",
"3. **Błąd wyjścia $\\delta^{(L)}$:** oblicz wektor $$\\delta^{(L)}= \\nabla_{a^{(L)}}J(\\Theta) \\odot {g^{\\prime}}^{(L)}(z^{(L)})$$\n",
"4. **Propagacja wsteczna błędu:** dla $l = L-1,L-2,\\dots,1$ oblicz $$\\delta^{(l)} = \\delta^{(l+1)}(\\Theta^{(l+1)})^T \\odot {g^{\\prime}}^{(l)}(z^{(l)})$$\n",
"5. **Gradienty:** \n",
" * $\\dfrac{\\partial}{\\partial \\Theta_{ij}^{(l)}} J(\\Theta) = a_i^{(l-1)}\\delta_j^{(l)} \\textrm{ oraz } \\dfrac{\\partial}{\\partial \\beta_{j}^{(l)}} J(\\Theta) = \\delta_j^{(l)}$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"W naszym przykładzie:\n",
"\n",
"$$\\small J(\\Theta) = \\frac{1}{2}(a^{(L)} - y)^2 $$\n",
"$$\\small \\dfrac{\\partial}{\\partial a^{(L)}} J(\\Theta) = a^{(L)} - y$$\n",
"\n",
"$$\\small \\tanh^{\\prime}(x) = 1 - \\tanh^2(x)$$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"<img src=\"nn3.png\" />"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Algorytm SGD z propagacją wsteczną\n",
"\n",
"Pojedyncza iteracja:\n",
"* Dla parametrów $\\Theta = (\\Theta^{(1)},\\ldots,\\Theta^{(L)})$ utwórz pomocnicze macierze zerowe $\\Delta = (\\Delta^{(1)},\\ldots,\\Delta^{(L)})$ o takich samych wymiarach (dla uproszczenia opuszczono wagi $\\beta$).\n",
"* Dla $m$ przykładów we wsadzie (_batch_), $i = 1,\\ldots,m$:\n",
" * Wykonaj algortym propagacji wstecznej dla przykładu $(x^{(i)}, y^{(i)})$ i przechowaj gradienty $\\nabla_{\\Theta}J^{(i)}(\\Theta)$ dla tego przykładu;\n",
" * $\\Delta := \\Delta + \\dfrac{1}{m}\\nabla_{\\Theta}J^{(i)}(\\Theta)$\n",
"* Wykonaj aktualizację wag: $\\Theta := \\Theta - \\alpha \\Delta$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Propagacja wsteczna podsumowanie\n",
"\n",
"* Algorytm pierwszy raz wprowadzony w latach 70. XX w.\n",
"* W 1986 David Rumelhart, Geoffrey Hinton i Ronald Williams pokazali, że jest znacznie szybszy od wcześniejszych metod.\n",
"* Obecnie najpopularniejszy algorytm uczenia sieci neuronowych."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"## 5.5. Implementacja sieci neuronowych"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>łod.dł.</th>\n",
" <th>łod.sz.</th>\n",
" <th>pł.dł.</th>\n",
" <th>pł.sz.</th>\n",
" <th>Iris setosa?</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5.2</td>\n",
" <td>3.4</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>5.1</td>\n",
" <td>3.7</td>\n",
" <td>1.5</td>\n",
" <td>0.4</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>6.7</td>\n",
" <td>3.1</td>\n",
" <td>5.6</td>\n",
" <td>2.4</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>6.5</td>\n",
" <td>3.2</td>\n",
" <td>5.1</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>4.9</td>\n",
" <td>2.5</td>\n",
" <td>4.5</td>\n",
" <td>1.7</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>6.0</td>\n",
" <td>2.7</td>\n",
" <td>5.1</td>\n",
" <td>1.6</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" łod.dł. łod.sz. pł.dł. pł.sz. Iris setosa?\n",
"0 5.2 3.4 1.4 0.2 1.0\n",
"1 5.1 3.7 1.5 0.4 1.0\n",
"2 6.7 3.1 5.6 2.4 0.0\n",
"3 6.5 3.2 5.1 2.0 0.0\n",
"4 4.9 2.5 4.5 1.7 0.0\n",
"5 6.0 2.7 5.1 1.6 0.0"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas\n",
"src_cols = ['łod.dł.', 'łod.sz.', 'pł.dł.', 'pł.sz.', 'Gatunek']\n",
"trg_cols = ['łod.dł.', 'łod.sz.', 'pł.dł.', 'pł.sz.', 'Iris setosa?']\n",
"data = (\n",
" pandas.read_csv('iris.csv', usecols=src_cols)\n",
" .apply(lambda x: [x[0], x[1], x[2], x[3], 1 if x[4] == 'Iris-setosa' else 0], axis=1))\n",
"data.columns = trg_cols\n",
"data[:6]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 1. 5.2 3.4 1.4 0.2]\n",
" [ 1. 5.1 3.7 1.5 0.4]\n",
" [ 1. 6.7 3.1 5.6 2.4]\n",
" [ 1. 6.5 3.2 5.1 2. ]\n",
" [ 1. 4.9 2.5 4.5 1.7]\n",
" [ 1. 6. 2.7 5.1 1.6]]\n",
"[[ 1.]\n",
" [ 1.]\n",
" [ 0.]\n",
" [ 0.]\n",
" [ 0.]\n",
" [ 0.]]\n"
]
}
],
"source": [
"m, n_plus_1 = data.values.shape\n",
"n = n_plus_1 - 1\n",
"Xn = data.values[:, 0:n].reshape(m, n)\n",
"X = np.matrix(np.concatenate((np.ones((m, 1)), Xn), axis=1)).reshape(m, n_plus_1)\n",
"Y = np.matrix(data.values[:, n]).reshape(m, 1)\n",
"\n",
"print(X[:6])\n",
"print(Y[:6])"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/10\n",
"150/150 [==============================] - 0s - loss: 2.0678 - acc: 0.6667 \n",
"Epoch 2/10\n",
"150/150 [==============================] - 0s - loss: 1.9711 - acc: 0.6667 \n",
"Epoch 3/10\n",
"150/150 [==============================] - 0s - loss: 1.8811 - acc: 0.6667 \n",
"Epoch 4/10\n",
"150/150 [==============================] - 0s - loss: 1.7793 - acc: 0.6667 \n",
"Epoch 5/10\n",
"150/150 [==============================] - 0s - loss: 1.6948 - acc: 0.6667 \n",
"Epoch 6/10\n",
"150/150 [==============================] - 0s - loss: 1.5993 - acc: 0.6667 \n",
"Epoch 7/10\n",
"150/150 [==============================] - 0s - loss: 1.5162 - acc: 0.6667 \n",
"Epoch 8/10\n",
"150/150 [==============================] - 0s - loss: 1.4308 - acc: 0.6667 \n",
"Epoch 9/10\n",
"150/150 [==============================] - 0s - loss: 1.3487 - acc: 0.6667 \n",
"Epoch 10/10\n",
"150/150 [==============================] - 0s - loss: 1.2676 - acc: 0.6667 \n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x7f87f40aa150>"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from keras.models import Sequential\n",
"from keras.layers import Dense\n",
"\n",
"model = Sequential()\n",
"model.add(Dense(3, input_dim=5))\n",
"model.add(Dense(3))\n",
"model.add(Dense(1, activation='sigmoid'))\n",
"\n",
"model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
"\n",
"model.fit(X, Y)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"data": {
"text/plain": [
"0.8209257125854492"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.predict(np.array([1.0, 3.0, 1.0, 2.0, 4.0]).reshape(-1, 5)).tolist()[0][0]"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"## 5.6. Funkcje aktywacji"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"* Każda funkcja aktywacji ma swoje zalety i wady.\n",
"* Różne rodzaje funkcji aktywacji nadają się do różnych zastosowań."
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"slideshow": {
"slide_type": "notes"
}
},
"outputs": [],
"source": [
"import math\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import random\n",
"\n",
"import keras\n",
"from keras.datasets import mnist\n",
"from keras.models import Sequential\n",
"from keras.layers import Dense, Dropout, SimpleRNN, LSTM\n",
"from keras.optimizers import Adagrad, Adam, RMSprop, SGD\n",
"\n",
"from IPython.display import YouTubeVideo"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"slideshow": {
"slide_type": "notes"
}
},
"outputs": [],
"source": [
"def plot(fun):\n",
" x = np.arange(-3.0, 3.0, 0.01)\n",
" y = [fun(x_i) for x_i in x]\n",
" fig = plt.figure(figsize=(14, 7))\n",
" ax = fig.add_subplot(111)\n",
" fig.subplots_adjust(left=0.1, right=0.9, bottom=0.1, top=0.9)\n",
" ax.set_xlim(-3.0, 3.0)\n",
" ax.set_ylim(-1.5, 1.5)\n",
" ax.grid()\n",
" ax.plot(x, y)\n",
" plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"### Funkcja logistyczna"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"$$ g(x) = \\frac{1}{1 + e^{-x}} $$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"* Przyjmuje wartości z przedziału $(0, 1)$."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"#### Funkcja logistyczna wykres"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2cAAAG9CAYAAACRcQ4FAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3XmUnOld2Pvvr/d9qVa39m5JI41m\npNlHs4B9bXmwPXZibIgXILZjE+MBEgiG41zswAlmyQVuiOEe35DDEILZTojNvcEYfA0YW4MDnn3x\njMazat+l3vf1uX9UdUvqac20Rl2qt7u/n3PqVNVb71vz6Ph1q7963/d5I6WEJEmSJKm0yko9AEmS\nJEmScSZJkiRJmWCcSZIkSVIGGGeSJEmSlAHGmSRJkiRlgHEmSZIkSRlQtDiLiE0R8bmI+FZEjERE\niogti9z2UGH9+Y/vK9Z4JUmSJKmUKor43duBDwCPAd8E3n6Z2/818Jl5y56/8mFJkiRJUvYUM87+\nPqW0FiAifoTLj7NzKaUHl35YkiRJkpQ9RTutMaU0U6zvliRJkqSVJssTgnxv4Vq18Yh40OvNJEmS\nJK1kWY2zLwM/CdwLfBAYA/5nRHyopKOSJEmSpCKJlFLx/yP5a85+F9iaUjr0OrYvBx4E1qWUNl9i\nnfuA+wBqampu7+zsfP0D1oo0MzNDWVlW/z1CpeS+oYW4X+hS3De0EPcLLeSFF144l1JqX+z6xZwQ\nZMmklKYj4ovAr0fE+pTSyQXWuR+4H2Dnzp3p+eed2FEX27dvH3v37i31MJRB7htaiPuFLsV9Qwtx\nv9BCIuLw5ay/HPO++If6JEmSJOkqWxZxFhEVwA8AR1JKp0o9HkmSJElaakU9rTEi3ld4eXvh+Z0R\ncRY4m1J6oLDOFPAHKaWPFd7/EPAe4CvAUWAt8K+B24AfKuZ4JUmSJKlUin3N2Rfnvf/twvMDwN7C\n6/LCY9ZBoAP4j0AOGAYeBd6RUvrroo1UkiRJkkqoqHGWUorLXSel9CBwT9EGJUmSJEkZtCyuOZMk\nSZKklc44kyRJkqQMMM4kSZIkKQOMM0mSJEnKAONMkiRJkjLAOJMkSZKkDDDOJEmSJCkDjDNJkiRJ\nygDjTJIkSZIywDiTJEmSpAwwziRJkiQpA4wzSZIkScoA40ySJEmSMsA4kyRJkqQMMM4kSZIkKQOM\nM0mSJEnKAONMkiRJkjLAOJMkSZKkDDDOJEmSJCkDjDNJkiRJygDjTJIkSZIywDiTJEmSpAwwziRJ\nkiQpA4wzSZIkScoA40ySJEmSMsA4kyRJkqQMMM4kSZIkKQOMM0mSJEnKAONMkiRJkjLAOJMkSZKk\nDDDOJEmSJCkDjDNJkiRJygDjTJIkSZIywDiTJEmSpAwwziRJkiQpA4wzSZIkScoA40ySJEmSMsA4\nkyRJkqQMMM4kSZIkKQOMM0mSJEnKAONMkiRJkjLAOJMkSZKkDDDOJEmSJCkDjDNJkiRJygDjTJIk\nSZIywDiTJEmSpAwwziRJkiQpA4wzSZIkScoA40ySJEmSMsA4kyRJkqQMMM4kSZIkKQOMM0mSJEnK\nAONMkiRJkjLAOJMkSZKkDChanEXEpoj4XER8KyJGIiJFxJZFblsWEZ+OiEMRMRYRT0XEe4s1VkmS\nJEkqtWIeOdsOfADoBb55mdv+MvAZ4P8G3gk8CHwxIv7JUg5QkiRJkrKioojf/fcppbUAEfEjwNsX\ns1FEdACfBH4tpfQbhcXfiIjtwK8BXynGYCVJkiSplIp25CylNPM6N70XqAL+eN7yPwZujIitVzQw\nSZIkScqgLE4IshsYB16at3x/4XnX1R2OJEmSJBVfFuMsB/SllNK85T0XfC5JkiRJK0oxrzm7qiLi\nPuA+gPb2dvbt21faASlzhoaG3C+0IPcNLcT9QpfivqGFuF9oKWQxznqBloiIeUfPZo+Y9SywDSml\n+4H7AXbu3Jn27t1b1EFq+dm3bx/uF1qI+4YW4n6hS3Hf0ELcL7QUsnha436gGrhm3vLZa82evbrD\nkSRJkqTiy2KcfRWYBD44b/mHgGdSSgev/pAkSZIkqbiKelpjRLyv8PL2wvM7I+IscDal9EBhnSng\nD1JKHwNIKZ2JiM8Cn46IQeBx4AeAe4B3F3O8kiRJklQqxb7m7Ivz3v924fkBYG/hdXnhcaGfA4aA\nnwLWAc8DH0gp/WVxhilJkiRJr256JjEyMcXw+DTDE1OMjE8zND6VXzYxzfD4FMPjU4xMTFNdcfkn\nKRY1zlJK8XrWSSlNA79SeEiSJEnS6zIzkxiemGJofIqhsSkGC8/D4+dfD43nH4Ozr8cm596PzEbX\nxBRjkzOL/u9uztVe9lizOFujJEmSJDE9kxgam2JgbJL+0UkGRicZGJtkYHSK/tFJBscmXyWwzi9b\njNrKchpqKmisrqChpoKG6go25+poqK6grqqc+uoK6qsqqK8up67wXF9VQV3hub46v05dVX79yvIy\n4mcv789rnEmSJEkqipQSwxPTr4iq2ff511OFzwrvx6byn4/mw+vVREBD9cVB1VRbycaWWhouWHbR\n63kB1lhdSX11ORXlpZ8r0TiTJEmS9JrGp6bpG5mkd2SCvpFJ+kYm6C287y889xaW59ebpH90gsnp\n9Krf21BdQVNNPqpmw+r69Y0011bSVJNfln9dWKemkua6/Pv6qgrKyl7zSqplwziTJEmSVpmJqRl6\nhifoHh6ne2iCnuEJzg2Nn4+v0UJ8DRdia3SSkYnpS35fVUUZrXWVtNZV0VJXyfaOBlrqKmmpq6Jl\nNq7mIquSptoKmmoqaaypyMQRq6wwziRJkqRlbmJqht6RfGD1DE/QPTRB9/AEPYX46h6eoHv2s+EJ\nBscWPl2wLMgHVV0lLbWVrG+u4fr1TbTWVc7FVmtdFa11+aNXrYX3tVXzJ1/X62GcSZIkSRk0NjnN\nuaFxzg4WHkPjnBnIP58bHC/EVz7ILhVb5WVBrr6Ktvoq2hqquLG1Jf+6vopcw+zy6rl1mmoqV9Rp\ngsuNcSZJkiRdJTMzid6RCc5eEF1nBi8IsLkIG2NggeCKgFxdFWsaqmlrqOKGjc350CrEl7G1vBln\nkiRJ0hVKKdE/OsnpgTFO9Y9x6oLn0/1jcwF2bmicqZlXTpBRW1lOR1M17Q3VXLu2gTdc00Z7YzXt\njdV0NNbMvc7VV1HpNVorlnEmSZIkvYqp6RnODo3nY2s2vArRdWpgjNMD4xzvHWHir//mFdvm6qtY\n21TD2qZqrl/fmI+shmraC8HVUYiu+mp/LZdxJkmSpFUspcS5oQlO9I1yom+U432jnOgby7/vH+VU\n/xjnhsaZf7CrsjxY21TDuqYadm1o4tr6cfbs3sHa5vyydU01dDRVU1PpRBlaPONMkiRJK9bY5PQF\n0TXK8dnwmn30jzExNXPRNnVV5WxoqWV9cw3XrWtkXVPNXHStbaphXXMNubqqi67l2rdvH3vftO1q\n//G0whhnkiRJWraGx6c42jvC0Z5RjvSMcKx3pBBe+QjrHp64aP0IWNtYw4aWGm7Y2My9u9exoaW2\n8KhhY0stzbWVRDiJhq4+40ySJEmZNTE1w4m+0bkAyz8XHr2j9MyLr7qqcja15mPrxk3NbCxE14bm\n/LJ1zTVOqKHMMs4kSZJUMiklzg6Oc6RnhKO9IxzpPh9gx3pHOdk/etH1XhVlwcbWWjpzddy7oZnN\nuVo2t9bRmatjc66O1jqPemn5Ms4kSZJUVCklTg+Mc6h7mEPnhjnUPcLh7mEOnhvmcPcIo5PTF63f\n0VhNZ66OO7fm2Nxay6bc+fha11RDufft0gplnEmSJOmKzcwkTg+OzQXXbIjNvh6bPD/pRmV5sDlX\nx5a2er7rmja2tNXT2VbH5tY6NrXWOsOhVi3jTJIkSYvWNzLBy2eHePnsMC+fHeLg2XyAHe65OMCq\nysvYnKtlS1s9b9i+hi1
"text/plain": [
"<matplotlib.figure.Figure at 0x7f8808515990>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot(lambda x: 1 / (1 + math.exp(-x)))"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"### Tangens hiperboliczny"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"$$ g(x) = \\tanh x = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}} $$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"* Przyjmuje wartości z przedziału $(-1, 1)$.\n",
"* Powstaje z funkcji logistycznej przez przeskalowanie i przesunięcie."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"#### Tangens hiperboliczny wykres"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2cAAAG9CAYAAACRcQ4FAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzs3Xd8W/W9//H3V7LkPRM7znI2zh5k\nshM2ZZWy9w4tpXTclu7bUri/Cx20vW25lwTCCJAWKFBWKaMYAmRAErL33rbjeFvW+v7+kDJxwEms\nnGP59Xw89JB0dI78SXsa59Wjc2SstQIAAAAAOMvj9AAAAAAAAOIMAAAAAFyBOAMAAAAAFyDOAAAA\nAMAFiDMAAAAAcAHiDAAAAABcIGFxZozpYYz5kzFmljGm0RhjjTG9W7nthvj6B9++mqh5AQAAAMBJ\nKQl87/6SrpA0T9JMSWcf5vb/kvTLg5atPPqxAAAAAMB9EhlnH1hru0iSMeY2HX6cVVprZ7f9WAAA\nAADgPgn7WKO1Npqo9wYAAACAZOPmC4JcGD9XrdkYM5vzzQAAAAAkM7fG2auSviXpHEnXSgpIeskY\nc52jUwEAAABAghhrbeJ/SOycs6mS+lhrNxzB9l5JsyUVW2t7HmKdyZImS1JaWtrokpKSIx8YSSka\njcrjcev/HwEnsW+gJewXOBT2DbSE/QItWbVqVaW1trC16yfygiBtxlobMcY8L+lBY0xXa+32FtaZ\nImmKJJWWltqVK7mwIw5UVlamiRMnOj0GXIh9Ay1hv8ChsG+gJewXaIkxZuPhrN8e8z7xh/oAAAAA\n4BhrF3FmjEmRdKWkTdbaHU7PAwAAAABtLaEfazTGXBZ/ODp+f54xpkJShbX2/fg6YUlPWmtvjT+/\nWtLFkt6QtFlSF0nflHS8pKsTOS8AAAAAOCXR55w9f9Dzh+P370uaGH/sjd/2WC+pSNJvJBVIapD0\nqaRzrbX/StikAAAAAOCghMaZtdYc7jrW2tmSTk/YUAAAAADgQu3inDMAAAAASHbEGQAAAAC4AHEG\nAAAAAC5AnAEAAACACxBnAAAAAOACxBkAAAAAuABxBgAAAAAuQJwBAAAAgAsQZwAAAADgAsQZAAAA\nALgAcQYAAAAALkCcAQAAAIALEGcAAAAA4ALEGQAAAAC4AHEGAAAAAC5AnAEAAACACxBnAAAAAOAC\nxBkAAAAAuABxBgAAAAAuQJwBAAAAgAsQZwAAAADgAsQZAAAAALgAcQYAAAAALkCcAQAAAIALEGcA\nAAAA4ALEGQAAAAC4AHEGAAAAAC5AnAEAAACACxBnAAAAAOACxBkAAAAAuABxBgAAAAAuQJwBAAAA\ngAsQZwAAAADgAsQZAAAAALgAcQYAAAAALkCcAQAAAIALEGcAAAAA4ALEGQAAAAC4AHEGAAAAAC5A\nnAEAAACACxBnAAAAAOACxBkAAAAAuABxBgAAAAAuQJwBAAAAgAsQZwAAAADgAsQZAAAAALgAcQYA\nAAAALkCcAQAAAIALEGcAAAAA4ALEGQAAAAC4AHEGAAAAAC5AnAEAAACACxBnAAAAAOACxBkAAAAA\nuEDC4swY08MY8ydjzCxjTKMxxhpjerdyW48x5sfGmA3GmIAxZqEx5tJEzQoAAAAATkvkkbP+kq6Q\ntFvSzMPc9j5Jv5T0Z0nnSZot6XljzFfackAAAAAAcIuUBL73B9baLpJkjLlN0tmt2cgYUyTp+5Ie\nsNb+Nr74PWNMf0kPSHojEcMCAAAAgJMSduTMWhs9wk3PkeSX9PRBy5+WNMwY0+eoBgMAAAAAF3Lj\nBUGGSGqWtOag5Uvj94OP7TgAAAAAkHiJ/FjjkSqQVG2ttQctr9rvdQAAAAA4KtGoVcRaRaJW0T33\nUX1u2QGvW6tIVHsfhw9+Pf6efu/hHwdzY5wdEWPMZEmTJamwsFBlZWXODgTXqa+vZ79Ai9g30BL2\nCxwK+wZa0lH3i1icKHazUji63/P441BUilgp9CWvReKvRWwsjqJWsQiy+5ZFooot33OL2n3PD1o3\nesDzWIiFrQ5434OPBrWlTmnmsLdxY5ztlpRnjDEHHT3bc8SsqoVtZK2dImmKJJWWltqJEycmdEi0\nP2VlZWK/QEvYN9AS9gscCvsGWuKW/SIYjqoxGFZjMKLGYFhNwaiaQhEF9tzCUQVCETWHIgqEovFl\n+z0ORRUIx15v2n+d+ONgJKpgOKpQ/D4cbdu88RgpxetRisfEbi099hp5vR75vEZ+j5HP45E3vvzA\n9fbf1iglvp7Pa+T1xLb3mNjrHo+R12PkNfHHRvLuWW723e+/zOtRbPv4+xywvccoLcWr4fce3p/f\njXG2VFKqpH468LyzPeeaLTvmEwEAAAAJEI5EVRcIqzYQOuC+PhBWYzCshmBEjc3x+2BYDc0H3R/0\neihy+LEUCwmP0nxepfm8SvV5lJbiVZovtiw33Rd7nBJ7ze/1yOf1yJ8Su/m8HqWm7Fu29zWvR/4U\nI7/XGwup/db1H7T+nu29nsM/2pRM3Bhnb0oKSbpW0v6teZ2kJdba9Y5MBQAAABzEWqv65rAqGqNa\ntKVauxtDqm4MqroxpLq9wbUvuvYua4rdN4Uirfo5mX6vMlJTYvf+FGWmepWX4Vf3/PjzFl5P96co\n3bcvsvYPrlSfJ/6aV74jODcKiZHQODPGXBZ/ODp+f54xpkJShbX2/fg6YUlPWmtvlSRrbbkx5iFJ\nPzbG1EmaL+lKSadLuiiR8wIAAKBja2gOq7K+OX4LandDcG9w7W7c//G+CNv70b4PPvrc+6WmeJSd\n5lNOWoqy01KUk+5T19w0Zaf6lJ2WEnstPXafvWedNJ+yUlOUmRqLrLQUrzwd/IhSR5HoI2fPH/T8\n4fj9+5Imxh9747f9/VRSvaRvSyqWtFLSFdba1xIzJgAAAJJVUzCi8rqAKuubVVEXVEV9syrrmuPP\n94VYZX2zGoMtH8nyez3Ky/ApP8OvvAyf+hdmKT/Tp7wMv/IzfNq5aZ0mHD9cBfFluemx2EpNOfif\nucChJTTOrLVfmvgtrWOtjUi6P34DAAAAPsdaq9pAWDtrA9peE9COmqb4fez5nuU1TaEWty/I9Ktz\nll+ds1I1qiRPnbNS4ze/CrNjj/cEWYbfK2MO/U/bsrLNmji4S6L+qOgg3HjOGQAAACBrrSrqm7W5\nqklbdjdqy+4mba6K3W+radKOmsDnjnQZI3XOSlVxTpp6FmRoXJ8CdclJU5ectL0hVpSdqoJMv1I4\n1wouQ5wBAADAMYFQRBt3NWp9Zb027mrU5t2NB8RYczh6wPqds/zqkZ+hQcU5mlRapOKcNBXnpqlr\nbuy+KDtN/hSiC+0TcQYAAICECkei2lrdpHWVDVpf0aD1lftu22qatP832+am+9QjP10DirJ1+sAi\n9SzIUI/8dPXMz1CP/Ayl+zmHC8mLOAMAAECbCIajWl/ZoFU76/be1pTXa1NV4wHfv5WdlqK+nTM1\ntne++nTuqT6FmerTKVMlnTKUm+5z8E8AOIs4AwAAwGGJRq027GrQyh11WrWzfm+Ira9s2HtZeY+R\nenfO1ICiLJ09pFh9Omeqb+dM9e6cqU6Z/i+8uAbQURFnAAAAOKRQJKrVO+u1ZFuNlm2r1dL4fUP8\nQhzGSCUFGRpQlK2zBndRaXG2BhRlq29hptJ8fAQROBzEGQAAACTFQmzF9jp9tqVaS7fWaMm2Gq3a\nUa9gJHZRjgy/V4O75uiy0T00pFuuBnaNhRjngQFtgzgDAADooHbUBLRg024t2FytBZt2a9GWmr1X\nR8zP8GlIt1zdfFJvDemeqyHdctS7U6a8Hj6OCCQKcQYAANABBMNRLd5ao3kbq7RgU7U+21yt7TUB\nSZI/xaOh3XJ03YReGlWSp5E989Q9L53zwoBjjDgDAABIQoFQRJ9trtacdVWau2GX5m3crUAodlSs\nJP7lzKN65mlUSb4Gdc3
"text/plain": [
"<matplotlib.figure.Figure at 0x7f87f1c714d0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot(lambda x: math.tanh(x))"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"### ReLU (_Rectifier Linear Unit_)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"$$ g(x) = \\max(0, x) $$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"#### ReLU zalety\n",
"* Mniej podatna na problem zanikającego gradientu (_vanishing gradient_) niż funkcje sigmoidalne, dzięki czemu SGD jest szybciej zbieżna.\n",
"* Prostsze obliczanie gradientu.\n",
"* Dzięki zerowaniu ujemnych wartości, wygasza neurony, „rozrzedzając” sieć (_sparsity_), co przyspiesza obliczenia."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"#### ReLU wady\n",
"* Dla dużych wartości gradient może „eksplodować”.\n",
"* „Wygaszanie” neuronów."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"#### ReLU wykres"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2cAAAG9CAYAAACRcQ4FAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAHy9JREFUeJzt3XuUrXdd3/HPNzmQcLFK9BBcSm4E\njyTeKrELq9VD5NpioosAVcBohSPWC8iiFYSlUVFI1UgXlS4SLyC4xEAVCKRYFCZa4RhuShNKQmgS\nkzZZBBMgJwm5cH79Y++RyWTPycyZ/cz+7b1fr7Vmzcyzn2efb+CXybzPs59nV2stAAAAzNYRsx4A\nAAAAcQYAANAFcQYAANABcQYAANABcQYAANABcQYAANCBweKsqr6+ql5bVR+sqturqlXVCZs89prx\n/us/fmCoeQEAAGZp14DPfXKSZyb5SJK/TvKkLR7/50nOWbftiu2PBQAA0J8h4+yvWmvHJklVPS9b\nj7PPttb2T38sAACA/gz2ssbW2sGhnhsAAGDR9HxDkO8fX6t2Z1Xtd70ZAACwyHqNs4uS/EySJyd5\ndpIvJvmzqnrOTKcCAAAYSLXWhv9DRtecXZDkxNbaNYdx/JFJ9id5RGvtkRvssy/JviQ5+uijH3vc\ncccd/sAspIMHD+aII3r9+whmydpgEuuCjSz72rj7YHLDbQdTSR7xkCPygOX9n+Jeln1dMNmVV175\n2dba7s3uP+QNQaamtfalqnprknOr6mtbazdM2Of8JOcnyZ49e9oVV7ixI/e2srKSvXv3znoMOmRt\nMIl1wUaWeW1cceOt+aEL9mf3kZW37PvOnPg1D5n1SN1Y5nXBxqrq2q3sP495P/ypPgAA7mU1zB4g\nzGAwcxFnVbUrybOS/ENr7cZZzwMAsEyEGeyMQV/WWFVnjb987PjzU6vqpiQ3tdYuGe9zT5I3ttZ+\nfPz9DyU5M8nFSa5LcmySn0ry7Ul+aMh5AQC4N2EGO2foa87euu77140/X5Jk7/jrI8cfq65O8vAk\nv5HkmCS3Jflwkqe01v58sEkBALgXYQY7a9A4a63VVvdpre1PcvpgQwEAcL+EGey8ubjmDACAnSPM\nYDbEGQAA/0SYweyIMwAAkggzmDVxBgCAMIMOiDMAgCUnzKAP4gwAYIkJM+iHOAMAWFLCDPoizgAA\nlpAwg/6IMwCAJSPMoE/iDABgiQgz6Jc4AwBYEsIM+ibOAACWgDCD/okzAIAFJ8xgPogzAIAFJsxg\nfogzAIAFJcxgvogzAIAFJMxg/ogzAIAFI8xgPokzAIAFIsxgfokzAIAFIcxgvokzAIAFIMxg/okz\nAIA5J8xgMYgzAIA5JsxgcYgzAIA5JcxgsYgzAIA5JMxg8YgzAIA5I8xgMYkzAIA5IsxgcYkzAIA5\nIcxgsYkzAIA5IMxg8YkzAIDOCTNYDuIMAKBjwgyWhzgDAOiUMIPlIs4AADokzGD5iDMAgM4IM1hO\n4gwAoCPCDJaXOAMA6IQwg+UmzgAAOiDMAHEGADBjwgxIxBkAwEwJM2CVOAMAmBFhBqwlzgAAZkCY\nAeuJMwCAHSbMgEnEGQDADhJmwEbEGQDADhFmwKGIMwCAHSDMgPsjzgAABibMgM0QZwAAAxJmwGaJ\nMwCAgQgzYCvEGQDAAIQZsFXiDABgyoQZcDjEGQDAFAkz4HCJMwCAKRFmwHaIMwCAKRBmwHaJMwCA\nbRJmwDSIMwCAbRBmwLQMFmdV9fVV9dqq+mBV3V5VrapO2OSxR1TVy6rqmqr6YlX9fVU9fahZAQAO\nhzADpmnIM2cnJ3lmkluS/PUWj/3VJOck+S9Jnppkf5K3VtW/nuaAAACHS5gB07ZrwOf+q9basUlS\nVc9L8qTNHFRVD0/ykiSvbq395njz+6vq5CSvTnLxEMMCAGyWMAOGMNiZs9bawcM89MlJHpjkzeu2\nvznJN1fVidsaDABgG66/9aAwAwbR4w1BTk1yZ5Kr1m2/fPz5lJ0dBwBg5Iobb825l94hzIBB9Bhn\nxyT5XGutrdt+85rHAQB21OpLGY88QpgBwxjymrMdVVX7kuxLkt27d2dlZWW2A9GdAwcOWBdMZG0w\niXXBWtffejDnXnpHjjyi8rPfdDDXXvahXDvroeiKnxlMQ49xdkuSr6qqWnf2bPWM2c0Tjklr7fwk\n5yfJnj172t69ewcdkvmzsrIS64JJrA0msS5YdcWNt+bFF+zPgx90VN6y7ztz7WUfsja4Dz8zmIYe\nX9Z4eZKjkjxq3fbVa80+sbPjAADLyl0ZgZ3UY5y9J8ndSZ69bvtzklzWWrt650cCAJaNMAN22qAv\na6yqs8ZfPnb8+alVdVOSm1prl4z3uSfJG1trP54krbXPVNV5SV5WVbcm+WiSZyU5PckZQ84LAJAI\nM2A2hr7m7K3rvn/d+PMlSfaOvz5y/LHWy5McSPLCJI9IckWSZ7bW3jXMmAAAI8IMmJVB46y1Voez\nT2vtS0leOf4AANgRwgyYpR6vOQMA2HHCDJg1cQYALD1hBvRAnAEAS02YAb0QZwDA0hJmQE/EGQCw\nlIQZ0BtxBgAsHWEG9EicAQBLRZgBvRJnAMDSEGZAz8QZALAUhBnQO3EGACw8YQbMA3EGACw0YQbM\nC3EGACwsYQbME3EGACwkYQbMG3EGACwcYQbMI3EGACwUYQbMK3EGACwMYQbMM3EGACwEYQbMO3EG\nAMw9YQYsAnEGAMw1YQYsCnEGAMwtYQYsEnEGAMwlYQYsGnEGAMwdYQYsInEGAMwVYQYsKnEGAMwN\nYQYsMnEGAMwFYQYsOnEGAHRPmAHLQJwBAF0TZsCyEGcAQLeEGbBMxBkA0CVhBiwbcQYAdEeYActI\nnAEAXRFmwLISZwBAN4QZsMzEGQDQBWEGLDtxBgDMnDADEGcAwIwJM4ARcQYAzIwwA/gycQYAzIQw\nA7g3cQYA7DhhBnBf4gwA2FHCDGAycQYA7BhhBrAxcQYA7AhhBnBo4gwAGJwwA7h/4gwAGJQwA9gc\ncQYADEaYAWyeOAMABiHMALZGnAEAUyfMALZOnAEAUyXMAA6POAMApkaYARw+cQYATIUwA9gecQYA\nbJswA9g+cQYAbIswA5gOcQYAHDZhBjA94gwAOCzCDGC6xBkAsGXCDGD6xBkAsCXCDGAYg8ZZVT2y\nqt5WVZ+vqi9U1Z9W1XGbPLZt8PFtQ84MAGxMmAEMZ9dQT1xVD07yviR3Jjk7SUvyyiTvr6pvaa3d\ntomneUOS16/bduU05wQANkeYAQxrsDhL8vwkJyXZ01q7Kkmq6uNJPpXkJ5Kct4nn+L+ttf3DjQgA\nbIYwAxjekC9rPCPJ/tUwS5LW2tVJ/ibJmQP+uQDAFAkzgJ0xZJydmuSyCdsvT3LKJp/jJ6vqzqq6\nvareV1X/anrjAQD3R5gB7JxqrQ3zxFV3JTmvtfbSddtfmeSlrbVDvqSyqt6U5F1J/l+S45P8h4yi\n7omttZUJ++9Lsi9Jdu/e/dgLL7xwGv8YLJADBw7koQ996KzHoEPWBpNYF8n1tx7MuZfekSOPqLz0\nXxydRzzETZ4Ta4PJrAsmefzjH/+R1tppm92/2zib8HxfkdGZuOtaa999qH337NnTrrjiiq2OzIJb\nWVnJ3r17Zz0GHbI2mGTZ14UzZhtb9rXBZNYFk1TVluJsyL8CuyXJwyZsP2b82Ja01m5N8u4k37HN\nuQCAQxBmALMxZJxdntF1Z+udkuQT23jeYU71AQDCDGCGhoyzdyZ5XFWdtLqhqk5I8l3jx7akqv5Z\nkqcluXRK8wEAawgzgNkaMs4uSHJNkndU1ZlVdUaSdyS5LmveWLqqjq+qe6rqF9dse0lVXVBVP1xV\ne6vq7Ixuwf+IJC8fcGYAWErCDGD2BnsT6tbabVV1epLfTvKmJJXkL5O8qLV2YM2uleTI3DsUr0jy\ng+OPr0zyhYzi7Mdba86
"text/plain": [
"<matplotlib.figure.Figure at 0x7f87f13c0d10>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot(lambda x: max(0, x))"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"### Softplus"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"$$ g(x) = \\log(1 + e^{x}) $$"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"* Wygładzona wersja ReLU."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"#### Softplus wykres"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2cAAAG9CAYAAACRcQ4FAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzs3Xl8XWWB//HPkz1N0jZt032nbbrR\nQkEWQakFBAVRQXAUHVfQ+SmKjuP+G0H56TAqowPqgCsuqIgLiw4oS9kXgbZAaUv3vU2XpNn35/dH\nUig1hbTN7TlJPu/X677uveeec/uF10OaL885zwkxRiRJkiRJycpKOoAkSZIkyXImSZIkSalgOZMk\nSZKkFLCcSZIkSVIKWM4kSZIkKQUsZ5IkSZKUAhkrZyGEsSGEa0MIj4YQ6kMIMYQwsZvHruvcf//H\n2zKVV5IkSZKSlJPB754CXAQ8BTwIvPEgj78LuGK/bSsOP5YkSZIkpU8my9kDMcYRACGED3Pw5Wxn\njPGxno8lSZIkSemTsdMaY4ztmfpuSZIkSepr0rwgyFs6r1VrCiE85vVmkiRJkvqytJaz24HLgLOA\ni4FG4I8hhPckmkqSJEmSMiTEGDP/h3Rcc/ZDYFKMcd0hHJ8NPAaMjDGOO8A+lwKXAhQUFBw3fvz4\nQw+sPqm9vZ2srLT+/wglybGhrjgudCBJjY09TZHKpkhuFgwfkEWuwzNV/Jmhrrzwwgs7Y4xl3d0/\nkwuC9JgYY1sI4XfA1SGEUTHGrV3scwNwA0B5eXlcscKFHfVyCxcuZP78+UnHUAo5NtQVx4UO5EiP\njT31LXz65sXcs7yCS+aM4j8umENxfq/4Fa5f8WeGuhJCWH8w+/fG/7IzP9UnSZKUAs9squL//Opp\ntlc3cuV5s/jnkycQQkg6lqQM6RXlLISQA7wT2BBj3JZ0HkmSpEyKMXLTExu48rbnGVacx80fOZlj\nx5cmHUtShmW0nIUQ3tH58rjO5zeFEHYAO2KM93fu0wrcGGP8UOf7dwFvBf4CbARGAB8D5gHvymRe\nSZKkpNU3t/KlPz7HHxdt5rRpZXznncdQWpSXdCxJR0CmZ85+t9/773c+3w/M73yd3fnYay0wHPgm\nMASoA54Ezo4x3pWxpJIkSQlbVVHLv/zyKVbtqOVfz5zGx94whawsT2OU+ouMlrMY46v+NNl/nxjj\nY8CCjIWSJElKoduWbOHzv3+GwtxsfvHBEzl16rCkI0k6wnrFNWeSJEl9VVNrG1//8zJufHQ9x08o\n5bp3z2PkoIKkY0lKgOVMkiQpIZsq6/nYTYtYsrGKS143ic+ePZ3cbO+VJfVXljNJkqQE3Leigk/9\ndjFtbZH/ec88zp49KulIkhJmOZMkSTqC2toj37n7Ba69dxUzRg3kBxfPY+KwoqRjSUoBy5kkSdIR\nUlHTyOW/Wcwjq3fxzuPHceVbZ1GQm/3qB0rqFyxnkiRJR8BDK3dy+W8XUdvUyjffMYcLjx+XdCRJ\nKWM5kyRJyqDWtna+e89KrrtvFVPKivn1JScxdURJ0rEkpZDlTJIkKUO2Vzdy2a8X8cTa3Vx0/Fiu\nPG82hXmexiipa5YzSZKkDLj/hR186reLaWhu45qL5nL+vLFJR5KUcpYzSZKkHtTa1s63//YCP1i4\nmvIRJXzv4nlMGV6cdCxJvYDlTJIkqYdsqWrgE79exJPrK3nXCeP4yltcjVFS91nOJEmSesC9y7fz\n6ZuX0NLaznf/6RjeesyYpCNJ6mUsZ5IkSYehpa2db961ghseWMOMUQP53ruPZXKZpzFKOniWM0mS\npEO0qbKey369iEUbqrj4xPH833NnehqjpENmOZMkSToEdz63lc/9/lna2iPXvftYzp0zOulIkno5\ny5kkSdJBaG6LfOmPz/Krxzdw9JhBXPuuY5k4rCjpWJL6AMuZJElSN63YVsOVjzawuXYDl75+Mp95\nYzl5OVlJx5LUR1jOJEmSXkWMkV8+voGr7nie/KzIjR88gdOmlSUdS1IfYzmTJEl6BVX1zXzu989w\n19LtvH5aGReMqbOYScoI5+ElSZIO4PE1u3jTdx/k3uUVfOnNM/jZ+1/DoPyQdCxJfZQzZ5IkSftp\nbWvn2ntXce29Kxk/ZAC//5fXMmfs4KRjSerjLGeSJEn72FzVwKd+s5gn1u3m/GPH8NW3zaY431+Z\nJGWeP2kkSZI67b13WWtbO//1zrm8/dixSUeS1I9YziRJUr/X2NLG1+54nl89voE5Ywfx3//kvcsk\nHXmWM0mS1K8t3bKHT/5mMasqavnI6yfzr967TFJCLGeSJKlfam+P/PDBNXzrrysoHZDHLz50Aq+b\n6hL5kpJjOZMkSf3OlqoG/vXmJTy6ZhdnzxrJN84/mtKivKRjSernLGeSJKlfueOZLXzxD8/S2h75\nzwvmcOHxYwnBe5dJSp7lTJIk9Qs1jS185dal/GHRZo4ZN5jvvPMYF/2QlCqWM0mS1Oc9uW43n7p5\nMZsrG/jE6VO5bMEUcrNd9ENSuljOJElSn9XS1s6196zkuvtWMaa0kN999GSOmzAk6ViS1CXLmSRJ\n6pPW7azjk79dzJKNVbzjuLF85S0zKSnITTqWJB2Q5UySJPUpMUZ++/eNfPWO58nNzuL7F8/jzUeP\nSjqWJL0qy5kkSeozdtU28cU/PstdS7fz2qOG8u2L5jJqUGHSsSSpWyxnkiSpT/jr0m184Q/PUtPY\nypfePIMPnTqJrCyXyJfUe1jOJElSr1bd2MJXb3+eW57axKzRA7npkmMoH1mSdCxJOmiWM0mS1Gs9\nsmon/3bLM2yrbuSyBVO4bMFU8nJcIl9S72Q5kyRJvU5DcxtX37mcnz2yjsnDirjloydz7PjSpGNJ\n0mGxnEmSpF5l8cYqPn3zYtbsqOP9r53I586eTmFedtKxJOmwWc4kSVKv0NzaznX3ruR7C1czoiSf\nX334RE6ZMizpWJLUYyxnkiQp9V7YXsOnfruYpVuquWDeWL5y3kwGekNpSX2M5UySJKVWW3vkxw+t\n4Vt/fYGS/Bz+5z3HcfbskUnHkqSMsJxJkqRU2rCrns/8bglPrNvNmTNH8I3zj2ZYcX7SsSQpYyxn\nkiQpVdrbIz9/dB1X37mCnKzAty6cywXzxhCCN5SW1LdZziRJUmqs21nHZ3//DE+s3c1p08r4xvlH\nM3pwYdKxJOmIsJxJkqTEtbdHfvbIOv7zruXkZmXxn++Yw4XHjXW2TFK/YjmTJEmJWruzjs/esoS/\nr6vkDeVlfP38oxk1yNkySf2P5UySJCWirXO27Jt3LSc3O8tryyT1e5YzSZJ0xK3ZUctnb3mGJ9dX\nsmD6cL7+9qMZOagg6ViSlCjLmSRJOmLa2iM/fXgt37xrBfk5WXz7wrmc72yZJAGWM0mSdISs7pwt\ne2p9JadPH87Xzz+aEQOdLZOkvSxnkiQpo1ra2rn+/tX89z2rKMjN4pqL5vL2Y50tk6T9Wc4kSVLG\nPLOpis/e8gzLt9Xw5qNHcsV5sxhe4myZJHXFciZJknpcQ3Mb1/xtBT9+aC3DivO5/r3HcdaskUnH\nkqRUy8rUF4cQxoYQrg0hPBpCqA8hxBDCxG4emxVC+EIIYV0IoTGEsCSEcEGmskqSpJ7zyKqdnPWd\nB/jhg2t552vG8bdPn2Yxk6RuyFg5A6YAFwGVwIMHeezXgCuA64A3AY8BvwshvLknA0qSpJ6zp76F\nz93yDO/+0eNkBbjpkhP5xvlzGFSYm3Q0SeoVMnla4wMxxhEAIYQPA2/szkEhhOHAZ4D/iDF+q3Pz\nfSGEKcB/AH/JRFhJknTo7nxuK//31qXsrmvmI6dN5lNnTKMgNzvpWJLUq2SsnMUY2w/x0LOAPOCX\n+23/JfCTEMKkGOPawwonSZJ6REV1I/9+61LuXLqNmaMG8tP3v4bZYwYlHUuSeqU0LggyC2gCVu23\nfWnn80zAciZJUoLa2yM
"text/plain": [
"<matplotlib.figure.Figure at 0x7f87f1348e50>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plot(lambda x: math.log(1 + math.exp(x)))"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"### Problem zanikającego gradientu (_vanishing gradient problem_)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"* Sigmoidalne funkcje aktywacji ograniczają wartości na wyjściach neuronów do niewielkich przedziałów ($(-1, 1)$, $(0, 1)$ itp.).\n",
"* Jeżeli sieć ma wiele warstw, to podczas propagacji wstecznej mnożymy przez siebie wiele małych wartości → obliczony gradient jest mały.\n",
"* Im więcej warstw, tym silniejszy efekt zanikania."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"#### Sposoby na zanikający gradient\n",
"\n",
"* Modyfikacja algorytmu optymalizacji (_RProp_, _RMSProp_)\n",
"* Użycie innej funckji aktywacji (ReLU, softplus)\n",
"* Dodanie warstw _dropout_\n",
"* Nowe architektury (LSTM itp.)\n",
"* Więcej danych, zwiększenie mocy obliczeniowej"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"## 5.7. Wielowarstwowe sieci neuronowe w&nbsp;praktyce"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"### Przykład: MNIST\n",
"\n",
"_Modified National Institute of Standards and Technology database_"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "fragment"
}
},
"source": [
"* Zbiór cyfr zapisanych pismem odręcznym\n",
"* 60 000 przykładów uczących, 10 000 przykładów testowych\n",
"* Rozdzielczość każdego przykładu: 28 × 28 = 784 piksele"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [],
"source": [
"# źródło: https://github.com/keras-team/keras/examples/minst_mlp.py\n",
"\n",
"import keras\n",
"from keras.datasets import mnist\n",
"\n",
"# załaduj dane i podziel je na zbiory uczący i testowy\n",
"(x_train, y_train), (x_test, y_test) = mnist.load_data()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"slideshow": {
"slide_type": "notes"
}
},
"outputs": [],
"source": [
"def draw_examples(examples, captions=None):\n",
" plt.figure(figsize=(16, 4))\n",
" m = len(examples)\n",
" for i, example in enumerate(examples):\n",
" plt.subplot(100 + m * 10 + i + 1)\n",
" plt.imshow(example, cmap=plt.get_cmap('gray'))\n",
" plt.show()\n",
" if captions is not None:\n",
" print(6 * ' ' + (10 * ' ').join(str(captions[i]) for i in range(m)))"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA6oAAACVCAYAAABRuAf7AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3XmUVNXd7vFno4ggICLEGA2Coigy\nKeAUl6DiFBFBFEMYlEThakTNG7kkhhgcEMVhvYDixBUjsgJGZDIaRUV4nVig0RtQDBoEUSIoMxi5\n6r5/dFd3/bbd1VXdp6pOVX0/a/XqeurUsLt4qNOnq3Zt570XAAAAAABxUS/fAwAAAAAAIBkHqgAA\nAACAWOFAFQAAAAAQKxyoAgAAAABihQNVAAAAAECscKAKAAAAAIiVSA9UnXM/ds495Zzb5pzb7px7\n2jnXKsr7QGGgC0igC5DoASrRBSTQBUj0ANVzUa2j6pxrJOldSV9LGiPJS7pNUiNJnbz3uyK5I8Qe\nXUACXYBED1CJLiCBLkCiB0ht7whv60pJh0tq573/UJKcc/9X0mpJIyTdG+F9Id7oAhLoAiR6gEp0\nAQl0ARI9QApRvqL6kqR9vfc/Cc5fLEne+x6R3BFijy4ggS5AogeoRBeQQBcg0QOkFuUrqsdKmlfF\n+SslXZLODTjnojlqRs54710VZ9epC/SgIH3hvW9Zxfl0ofRU1QX2DyWI/QPKsX9AQuT7B3pQkKp7\nTvieKD9MqbmkLVWcv1nSARHeD+KPLpSetdWcTxdKT1VdoAdIoAulh/0DEtg/QKr+OeF7onxFtVac\nc8MlDc/3OJBf9AAJdAEJdAESPUAlugCJHpSSKA9Ut6jqv3xU95cSSZL3/mFJD0u8fF9EMu4CPSha\ndAES+wdU4jkBCXQBEj1AClG+9Xelyt5nHmov6b0I7wfxRxeQQBcg0QNUogtIoAuQ6AFSiPJAdb6k\nk5xzhyfOcM61lvST8m0oHXQBCXQBEj1AJbqABLoAiR4ghSiXp9lPZQv2fqXKBXtvldREZQv27kzj\nNnj5vsBU9amOde0CPShIb3nvu4Vn0oWS9L0usH8oTewfUI79AxIi3z/Qg4JU5XNCVSJ7RdV7v0vS\nGZL+KWm6pBmS1kg6I51fQlA86AIS6AIkeoBKdAEJdAESPUBqkb2iGgX+KlJ4qlknr07oQUFK+69j\nmaALBYkuQBL7B1TgOQEJkXeBHhSk3L+iCgAAAABAFDhQBQAAAADECgeqAAAAAIBY4UAVAAAAABAr\nHKgCAAAAAGKFA1UAAAAAQKxwoAoAAAAAiJW98z0AoFR17drV5GuuucbkoUOHmvz444+bPHnyZJPf\nfvvtCEcHAACAbJk4caLJ1157bcXpFStWmG29e/c2ee3atdkbWIzwiioAAAAAIFY4UAUAAAAAxApv\n/Y3IXnvtZfL++++f9nXDt3w2atTI5Hbt2pn8q1/9yuS7777b5IEDB5r8n//8x+Q77rij4vTNN9+c\n9jhRN126dDF54cKFJjdt2tRk773JQ4YMMblPnz4mH3jggXUdIorEmWeeafKMGTNM7tGjh8kffPBB\n1seE7BgzZozJ4XN6vXr279E9e/Y0efHixVkZF4BoNGnSxOTGjRubfP7555vcsmVLk++9916Tv/76\n6whHh0y0bt3a5MGDB5v83XffVZw+5phjzLajjz7aZN76CwAAAABAHnCgCgAAAACIFQ5UAQAAAACx\nwhzVcq1atTJ5n332MfmUU04x+dRTTzW5WbNmJvfv3z+ysa1fv97kSZMmmdyvXz+Td+zYYfK7775r\nMnOScueEE06oOD179myzLZzHHM5JDf8d9+zZY3I4J/Wkk04yOVyuJrx+KTjttNMqToeP15w5c3I9\nnJzp3r27ycuWLcvTSBC1yy+/3OTRo0ebnDzHqSrh8wyA/Eueuxj+nz755JNN7tChQ0a3ffDBB5uc\nvAQKcmvTpk0mL1myxOTws0fAK6oAAAAAgJjhQBUAAAAAECscqAIAAAAAYqVk56iGa1q+/PLLJmey\nDmrUwjlG4Tp5O3fuNDlcI3HDhg0mb9myxWTWTIxOuObt8ccfb/ITTzxRcTqcJ1KT1atXmzxhwgST\nZ86cafJrr71mctib8ePHZ3T/xSB5zcgjjzzSbCumOarhWplt2rQx+bDDDjPZOZf1MSE7wn/Lfffd\nN08jQaZOPPFEk5PXUAzXNj722GNT3tYNN9xg8meffWZy+DkayfsiSVq6dGnqwSJS4RqY119/vcmD\nBg2qON2wYUOzLXy+/uSTT0wOP88iXH9zwIABJk+ZMsXkVatWVTdsRGzXrl0ml8paqHXBK6oAAAAA\ngFjhQBUAAAAAECscqAIAAAAAYqVk56iuW7fO5C+//NLkKOeohnNBtm7davLpp59ucrje5fTp0yMb\nC6L10EMPmTxw4MDIbjuc79q4cWOTw/Vwk+djSlKnTp0iG0uhGjp0aMXpN954I48jya5w/vOVV15p\ncjg/jTlJhaNXr14mjxw5MuXlw3/b3r17m/z5559HMzDU6NJLLzV54sSJJrdo0aLidDgP8ZVXXjG5\nZcuWJt91110p7zu8vfD6P/vZz1JeH5kJf2e88847TQ670KRJk7RvO/y8inPOOcfk+vXrmxw+ByT3\nrKqM3GnWrJnJnTt3ztNICgevqAIAAAAAYoUDVQAAAABArHCgCgAAAACIlbTmqDrnDpU0WlI3SZ0l\nNZTUxnv/cXC5fSXdKmmwpGaS3pE02nu/JMIxR2Lz5s0mjxo1yuRwXs/f//53kydNmpTy9t95552K\n02eddZbZFq6jFK6Xdt1116W87ThxznkVeBcy0bVrV5PPP/98k1OtTxnOKV2wYIHJd999t8nhunhh\nB8P1cc8444y0x5IFXZ1zrePWg3B90WI1derUlNvDOU5ZFssuFIpw/ctp06aZXNPnJ4RzF/O5Tl+x\n7x/23tv+CtWtWzeTH3nkEZPDdbeXLKn8cW+99Vaz7dVXXzW5QYMGJj/55JMmn3322SnHunz58pTb\ns6zonxP69etn8hVXXFHr2/roo49MDn+HDNdRbdu2ba3vKw+KvguphM8BrVq1Svu63bt3Nzmci1ys\na7Km+1tcW0kDJG2R9D8pLvd/JF0p6SZJvSVtkPS8c65LXQaJgkQXINEDVKILSKALkOgBKtEFVCnd\nT/1d4r0/SJKcc1dI+t6f7pxznSX9XNIvvPfTys9bLGmlpFsk9YlkxIg9ugCJHqASXUACXYBED1CJ\nLiCVtF5R9d5/l8bF+kj6f5JmJV3vG0kzJZ3jnGtQ3RVRdOgCJHqASnQBCXQBEj1AJbqAakW5juqx\nktZ473cH56+UtI/K3j68MsL7i9TcuXNNfvnll03esWOHyeHaR7/85S9NTp5vGM5JDa1caR+W4cOH\npx5s/BV0F5J16WLfdbJw4UKTmzZtarL33uTnnnuu4nS4xmqPHj1MHjNmjMnhvMNNmzaZ/O6775r8\n3Xf270nh/NlwXda3335bWZbzHoRrxx500EFR3nxs1TRvMextHhTNc0K2XXbZZSb/6Ec/Snn5cL3N\nxx9/POohRa1oujB48GCTa5orHv4/TF5bc/v27SmvG67DWdOc1PXr15v8pz/9KeXl86BoeiBJl1xy\nSUaX//jjj01etmxZxenRo0ebbeGc1NAxxxyT0X3HUFF1IZXws0cee+wxk8eOHVvtdcNtW7duNfm+\n++6ry9BiK8pPGmmusjmsoc1J21Ea6AIkeoBKdAEJdAESPUAluoBqRfmKaq0454ZLKviXEFE39AAJ\ndAEJdAESPUAlugCJHpSSKF9R3SLpgCrOT/wlZHMV2+S9f9h73817362q7ShIGXeBHhQlnhOQQBeQ\nwP4BEs8JqMRzAqoV5SuqKyX1c841Ct5n3l7SHkkfRnhfWVfTfJFt27al3H7llVdWnJ41a5bZFs4l\nLEIF24WjjjrK5HB93XA
"text/plain": [
"<matplotlib.figure.Figure at 0x7f87f132bed0>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" 5 0 4 1 9 2 1\n"
]
}
],
"source": [
"draw_examples(x_train[:7], captions=y_train)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"60000 przykładów uczących\n",
"10000 przykładów testowych\n"
]
}
],
"source": [
"num_classes = 10\n",
"\n",
"x_train = x_train.reshape(60000, 784) # 784 = 28 * 28\n",
"x_test = x_test.reshape(10000, 784)\n",
"x_train = x_train.astype('float32')\n",
"x_test = x_test.astype('float32')\n",
"x_train /= 255\n",
"x_test /= 255\n",
"print('{} przykładów uczących'.format(x_train.shape[0]))\n",
"print('{} przykładów testowych'.format(x_test.shape[0]))\n",
"\n",
"# przekonwertuj wektory klas na binarne macierze klas\n",
"y_train = keras.utils.to_categorical(y_train, num_classes)\n",
"y_test = keras.utils.to_categorical(y_test, num_classes)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"dense_4 (Dense) (None, 512) 401920 \n",
"_________________________________________________________________\n",
"dropout_1 (Dropout) (None, 512) 0 \n",
"_________________________________________________________________\n",
"dense_5 (Dense) (None, 512) 262656 \n",
"_________________________________________________________________\n",
"dropout_2 (Dropout) (None, 512) 0 \n",
"_________________________________________________________________\n",
"dense_6 (Dense) (None, 10) 5130 \n",
"=================================================================\n",
"Total params: 669,706\n",
"Trainable params: 669,706\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n"
]
}
],
"source": [
"model = Sequential()\n",
"model.add(Dense(512, activation='relu', input_shape=(784,)))\n",
"model.add(Dropout(0.2))\n",
"model.add(Dense(512, activation='relu'))\n",
"model.add(Dropout(0.2))\n",
"model.add(Dense(num_classes, activation='softmax'))\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"((60000, 784), (60000, 10))\n"
]
}
],
"source": [
"print(x_train.shape, y_train.shape)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train on 60000 samples, validate on 10000 samples\n",
"Epoch 1/5\n",
"60000/60000 [==============================] - 14s - loss: 0.2457 - acc: 0.9244 - val_loss: 0.1250 - val_acc: 0.9605\n",
"Epoch 2/5\n",
"60000/60000 [==============================] - 14s - loss: 0.1021 - acc: 0.9691 - val_loss: 0.0859 - val_acc: 0.9748\n",
"Epoch 3/5\n",
"60000/60000 [==============================] - 14s - loss: 0.0746 - acc: 0.9773 - val_loss: 0.0884 - val_acc: 0.9744\n",
"Epoch 4/5\n",
"60000/60000 [==============================] - 13s - loss: 0.0619 - acc: 0.9815 - val_loss: 0.0893 - val_acc: 0.9754\n",
"Epoch 5/5\n",
"60000/60000 [==============================] - 15s - loss: 0.0507 - acc: 0.9849 - val_loss: 0.0845 - val_acc: 0.9771\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x7f87e996a2d0>"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.compile(loss='categorical_crossentropy', optimizer=RMSprop(), metrics=['accuracy'])\n",
"\n",
"model.fit(x_train, y_train, batch_size=128, epochs=5, verbose=1,\n",
" validation_data=(x_test, y_test))"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test loss: 0.0845102945257\n",
"Test accuracy: 0.9771\n"
]
}
],
"source": [
"score = model.evaluate(x_test, y_test, verbose=0)\n",
"\n",
"print('Test loss: {}'.format(score[0]))\n",
"print('Test accuracy: {}'.format(score[1]))"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"Warstwa _dropout_ to metoda regularyzacji, służy zapobieganiu nadmiernemu dopasowaniu sieci. Polega na tym, że część węzłów sieci jest usuwana w sposób losowy."
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"slideshow": {
"slide_type": "notes"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"dense_7 (Dense) (None, 512) 401920 \n",
"_________________________________________________________________\n",
"dense_8 (Dense) (None, 512) 262656 \n",
"_________________________________________________________________\n",
"dense_9 (Dense) (None, 10) 5130 \n",
"=================================================================\n",
"Total params: 669,706\n",
"Trainable params: 669,706\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n",
"Train on 60000 samples, validate on 10000 samples\n",
"Epoch 1/5\n",
"60000/60000 [==============================] - 11s - loss: 0.2214 - acc: 0.9314 - val_loss: 0.1048 - val_acc: 0.9668\n",
"Epoch 2/5\n",
"60000/60000 [==============================] - 12s - loss: 0.0838 - acc: 0.9739 - val_loss: 0.0842 - val_acc: 0.9752\n",
"Epoch 3/5\n",
"60000/60000 [==============================] - 10s - loss: 0.0548 - acc: 0.9829 - val_loss: 0.0806 - val_acc: 0.9773\n",
"Epoch 4/5\n",
"60000/60000 [==============================] - 9s - loss: 0.0387 - acc: 0.9878 - val_loss: 0.0713 - val_acc: 0.9804\n",
"Epoch 5/5\n",
"60000/60000 [==============================] - 9s - loss: 0.0297 - acc: 0.9911 - val_loss: 0.0847 - val_acc: 0.9787\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x7f87e82d1350>"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Bez warstw Dropout\n",
"\n",
"num_classes = 10\n",
"\n",
"(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
"\n",
"x_train = x_train.reshape(60000, 784) # 784 = 28 * 28\n",
"x_test = x_test.reshape(10000, 784)\n",
"x_train = x_train.astype('float32')\n",
"x_test = x_test.astype('float32')\n",
"x_train /= 255\n",
"x_test /= 255\n",
"\n",
"y_train = keras.utils.to_categorical(y_train, num_classes)\n",
"y_test = keras.utils.to_categorical(y_test, num_classes)\n",
"\n",
"model_no_dropout = Sequential()\n",
"model_no_dropout.add(Dense(512, activation='relu', input_shape=(784,)))\n",
"model_no_dropout.add(Dense(512, activation='relu'))\n",
"model_no_dropout.add(Dense(num_classes, activation='softmax'))\n",
"model_no_dropout.summary()\n",
"\n",
"model_no_dropout.compile(loss='categorical_crossentropy',\n",
" optimizer=RMSprop(),\n",
" metrics=['accuracy'])\n",
"\n",
"model_no_dropout.fit(x_train, y_train,\n",
" batch_size=128,\n",
" epochs=5,\n",
" verbose=1,\n",
" validation_data=(x_test, y_test))"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test loss (no dropout): 0.0846566448619\n",
"Test accuracy (no dropout): 0.9787\n"
]
}
],
"source": [
"# Bez warstw Dropout\n",
"\n",
"score = model_no_dropout.evaluate(x_test, y_test, verbose=0)\n",
"\n",
"print('Test loss (no dropout): {}'.format(score[0]))\n",
"print('Test accuracy (no dropout): {}'.format(score[1]))"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"slideshow": {
"slide_type": "notes"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"dense_10 (Dense) (None, 2500) 1962500 \n",
"_________________________________________________________________\n",
"dense_11 (Dense) (None, 2000) 5002000 \n",
"_________________________________________________________________\n",
"dense_12 (Dense) (None, 1500) 3001500 \n",
"_________________________________________________________________\n",
"dense_13 (Dense) (None, 1000) 1501000 \n",
"_________________________________________________________________\n",
"dense_14 (Dense) (None, 500) 500500 \n",
"_________________________________________________________________\n",
"dense_15 (Dense) (None, 10) 5010 \n",
"=================================================================\n",
"Total params: 11,972,510\n",
"Trainable params: 11,972,510\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n",
"Train on 60000 samples, validate on 10000 samples\n",
"Epoch 1/10\n",
"60000/60000 [==============================] - 212s - loss: 0.7388 - acc: 0.7954 - val_loss: 0.2908 - val_acc: 0.9172\n",
"Epoch 2/10\n",
"60000/60000 [==============================] - 191s - loss: 0.2390 - acc: 0.9305 - val_loss: 0.1833 - val_acc: 0.9470\n",
"Epoch 3/10\n",
"60000/60000 [==============================] - 166s - loss: 0.1688 - acc: 0.9517 - val_loss: 0.1555 - val_acc: 0.9549\n",
"Epoch 4/10\n",
"60000/60000 [==============================] - 166s - loss: 0.1344 - acc: 0.9614 - val_loss: 0.1274 - val_acc: 0.9621\n",
"Epoch 5/10\n",
"60000/60000 [==============================] - 166s - loss: 0.1074 - acc: 0.9683 - val_loss: 0.1213 - val_acc: 0.9661\n",
"Epoch 6/10\n",
"60000/60000 [==============================] - 440s - loss: 0.0924 - acc: 0.9725 - val_loss: 0.1066 - val_acc: 0.9709\n",
"Epoch 7/10\n",
"60000/60000 [==============================] - 169s - loss: 0.0768 - acc: 0.9773 - val_loss: 0.1777 - val_acc: 0.9517\n",
"Epoch 8/10\n",
"60000/60000 [==============================] - 183s - loss: 0.0657 - acc: 0.9805 - val_loss: 0.1053 - val_acc: 0.9711\n",
"Epoch 9/10\n",
"60000/60000 [==============================] - 170s - loss: 0.0572 - acc: 0.9832 - val_loss: 0.1044 - val_acc: 0.9717\n",
"Epoch 10/10\n",
"60000/60000 [==============================] - 166s - loss: 0.0493 - acc: 0.9851 - val_loss: 0.0938 - val_acc: 0.9752\n"
]
},
{
"data": {
"text/plain": [
"<keras.callbacks.History at 0x7f87f007f610>"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Więcej warstw, inna funkcja aktywacji\n",
"\n",
"num_classes = 10\n",
"\n",
"(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
"\n",
"x_train = x_train.reshape(60000, 784) # 784 = 28 * 28\n",
"x_test = x_test.reshape(10000, 784)\n",
"x_train = x_train.astype('float32')\n",
"x_test = x_test.astype('float32')\n",
"x_train /= 255\n",
"x_test /= 255\n",
"\n",
"y_train = keras.utils.to_categorical(y_train, num_classes)\n",
"y_test = keras.utils.to_categorical(y_test, num_classes)\n",
"\n",
"model3 = Sequential()\n",
"model3.add(Dense(2500, activation='tanh', input_shape=(784,)))\n",
"model3.add(Dense(2000, activation='tanh'))\n",
"model3.add(Dense(1500, activation='tanh'))\n",
"model3.add(Dense(1000, activation='tanh'))\n",
"model3.add(Dense(500, activation='tanh'))\n",
"model3.add(Dense(num_classes, activation='softmax'))\n",
"model3.summary()\n",
"\n",
"model3.compile(loss='categorical_crossentropy',\n",
" optimizer=RMSprop(),\n",
" metrics=['accuracy'])\n",
"\n",
"model3.fit(x_train, y_train,\n",
" batch_size=128,\n",
" epochs=10,\n",
" verbose=1,\n",
" validation_data=(x_test, y_test))"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Test loss: 0.0937788957049\n",
"Test accuracy: 0.9752\n"
]
}
],
"source": [
"# Więcej warstw, inna funkcja aktywacji\n",
"\n",
"score = model3.evaluate(x_test, y_test, verbose=0)\n",
"\n",
"print('Test loss: {}'.format(score[0]))\n",
"print('Test accuracy: {}'.format(score[1]))"
]
}
],
"metadata": {
"celltoolbar": "Slideshow",
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.15rc1"
},
"livereveal": {
"start_slideshow_at": "selected",
"theme": "amu"
}
},
"nbformat": 4,
"nbformat_minor": 2
}