diff --git a/wyk/10_Propagacja_wsteczna.ipynb b/wyk/10_Propagacja_wsteczna.ipynb index 879468c..0a9945d 100644 --- a/wyk/10_Propagacja_wsteczna.ipynb +++ b/wyk/10_Propagacja_wsteczna.ipynb @@ -266,7 +266,7 @@ } }, "source": [ - "$$ f(x_1, x_2) = \\max(x_1 + x_2) \\hskip{12em} \\\\\n", + "$$ f(x_1, x_2) = \\max(x_1, x_2) \\hskip{12em} \\\\\n", "\\to \\qquad \\frac{\\partial f}{\\partial x_1} = \\mathbb{1}_{x \\geq y}, \\quad \\frac{\\partial f}{\\partial x_2} = \\mathbb{1}_{y \\geq x}, \\quad \\nabla f = (\\mathbb{1}_{x \\geq y}, \\mathbb{1}_{y \\geq x}) $$ " ] }, @@ -755,7 +755,7 @@ "\n", "Pojedyncza iteracja:\n", "* Dla parametrów $\\Theta = (\\Theta^{(1)},\\ldots,\\Theta^{(L)})$ utwórz pomocnicze macierze zerowe $\\Delta = (\\Delta^{(1)},\\ldots,\\Delta^{(L)})$ o takich samych wymiarach (dla uproszczenia opuszczono wagi $\\beta$).\n", - "* Dla $m$ przykładów we wsadzie (_batch_), $i = 1,\\ldots,m$:\n", + "* Dla $m$ przykładów we wsadzie (*batch*), $i = 1,\\ldots,m$:\n", " * Wykonaj algortym propagacji wstecznej dla przykładu $(x^{(i)}, y^{(i)})$ i przechowaj gradienty $\\nabla_{\\Theta}J^{(i)}(\\Theta)$ dla tego przykładu;\n", " * $\\Delta := \\Delta + \\dfrac{1}{m}\\nabla_{\\Theta}J^{(i)}(\\Theta)$\n", "* Wykonaj aktualizację wag: $\\Theta := \\Theta - \\alpha \\Delta$" @@ -969,7 +969,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "metadata": { "scrolled": true, "slideshow": { @@ -981,19 +981,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "Model: \"sequential_1\"\n", + "Model: \"sequential\"\n", "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", - "dense_3 (Dense) (None, 512) 401920 \n", + "dense (Dense) (None, 512) 401920 \n", "_________________________________________________________________\n", - "dropout (Dropout) (None, 512) 0 \n", + "dense_1 (Dense) (None, 512) 262656 \n", "_________________________________________________________________\n", - "dense_4 (Dense) (None, 512) 262656 \n", - "_________________________________________________________________\n", - "dropout_1 (Dropout) (None, 512) 0 \n", - "_________________________________________________________________\n", - "dense_5 (Dense) (None, 10) 5130 \n", + "dense_2 (Dense) (None, 10) 5130 \n", "=================================================================\n", "Total params: 669,706\n", "Trainable params: 669,706\n", @@ -1004,10 +1000,8 @@ ], "source": [ "model = keras.Sequential()\n", - "model.add(Dense(512, activation='relu', input_shape=(784,)))\n", - "model.add(Dropout(0.2))\n", - "model.add(Dense(512, activation='relu'))\n", - "model.add(Dropout(0.2))\n", + "model.add(Dense(512, activation='tanh', input_shape=(784,)))\n", + "model.add(Dense(512, activation='tanh'))\n", "model.add(Dense(num_classes, activation='softmax'))\n", "\n", "model.summary() # wyświetl podsumowanie architektury sieci" @@ -1015,7 +1009,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 6, "metadata": { "slideshow": { "slide_type": "subslide" @@ -1036,55 +1030,28 @@ }, { "cell_type": "code", - "execution_count": 12, - "metadata": { - "slideshow": { - "slide_type": "subslide" - } - }, + "execution_count": 7, + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Epoch 1/10\n", - "469/469 [==============================] - 20s 42ms/step - loss: 0.0957 - accuracy: 0.9708 - val_loss: 0.0824 - val_accuracy: 0.9758\n", - "Epoch 2/10\n", - "469/469 [==============================] - 20s 43ms/step - loss: 0.0693 - accuracy: 0.9793 - val_loss: 0.0807 - val_accuracy: 0.9772\n", - "Epoch 3/10\n", - "469/469 [==============================] - 18s 38ms/step - loss: 0.0563 - accuracy: 0.9827 - val_loss: 0.0861 - val_accuracy: 0.9758\n", - "Epoch 4/10\n", - "469/469 [==============================] - 18s 37ms/step - loss: 0.0485 - accuracy: 0.9857 - val_loss: 0.0829 - val_accuracy: 0.9794\n", - "Epoch 5/10\n", - "469/469 [==============================] - 19s 41ms/step - loss: 0.0428 - accuracy: 0.9876 - val_loss: 0.0955 - val_accuracy: 0.9766\n", - "Epoch 6/10\n", - "469/469 [==============================] - 22s 47ms/step - loss: 0.0377 - accuracy: 0.9887 - val_loss: 0.0809 - val_accuracy: 0.9794\n", - "Epoch 7/10\n", - "469/469 [==============================] - 17s 35ms/step - loss: 0.0338 - accuracy: 0.9904 - val_loss: 0.1028 - val_accuracy: 0.9788\n", - "Epoch 8/10\n", - "469/469 [==============================] - 17s 36ms/step - loss: 0.0322 - accuracy: 0.9911 - val_loss: 0.0937 - val_accuracy: 0.9815\n", - "Epoch 9/10\n", - "469/469 [==============================] - 18s 37ms/step - loss: 0.0303 - accuracy: 0.9912 - val_loss: 0.0916 - val_accuracy: 0.9829.0304 - accu\n", - "Epoch 10/10\n", - "469/469 [==============================] - 16s 34ms/step - loss: 0.0263 - accuracy: 0.9926 - val_loss: 0.0958 - val_accuracy: 0.9812\n" + "[[0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]\n", + " [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]\n", + " [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]\n", + " [0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]\n", + " [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]\n" ] - }, - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ - "model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.RMSprop(), metrics=['accuracy'])\n", - "\n", - "model.fit(x_train, y_train, batch_size=128, epochs=10, verbose=1,\n", - " validation_data=(x_test, y_test))" + "print(y_train[:10])" ] }, { @@ -1100,8 +1067,61 @@ "name": "stdout", "output_type": "stream", "text": [ - "Test loss: 0.0757974311709404\n", - "Test accuracy: 0.9810000061988831\n" + "Epoch 1/10\n", + "469/469 [==============================] - 11s 24ms/step - loss: 0.2807 - accuracy: 0.9158 - val_loss: 0.1509 - val_accuracy: 0.9550\n", + "Epoch 2/10\n", + "469/469 [==============================] - 11s 24ms/step - loss: 0.1242 - accuracy: 0.9619 - val_loss: 0.1076 - val_accuracy: 0.9677\n", + "Epoch 3/10\n", + "469/469 [==============================] - 11s 24ms/step - loss: 0.0812 - accuracy: 0.9752 - val_loss: 0.0862 - val_accuracy: 0.9723\n", + "Epoch 4/10\n", + "469/469 [==============================] - 11s 24ms/step - loss: 0.0587 - accuracy: 0.9820 - val_loss: 0.0823 - val_accuracy: 0.9727\n", + "Epoch 5/10\n", + "469/469 [==============================] - 11s 24ms/step - loss: 0.0416 - accuracy: 0.9870 - val_loss: 0.0735 - val_accuracy: 0.9763\n", + "Epoch 6/10\n", + "469/469 [==============================] - 11s 24ms/step - loss: 0.0318 - accuracy: 0.9897 - val_loss: 0.0723 - val_accuracy: 0.9761s: 0.0318 - accuracy: \n", + "Epoch 7/10\n", + "469/469 [==============================] - 11s 23ms/step - loss: 0.0215 - accuracy: 0.9940 - val_loss: 0.0685 - val_accuracy: 0.9792\n", + "Epoch 8/10\n", + "469/469 [==============================] - 11s 23ms/step - loss: 0.0189 - accuracy: 0.9943 - val_loss: 0.0705 - val_accuracy: 0.9786\n", + "Epoch 9/10\n", + "469/469 [==============================] - 11s 24ms/step - loss: 0.0148 - accuracy: 0.9957 - val_loss: 0.0674 - val_accuracy: 0.9790\n", + "Epoch 10/10\n", + "469/469 [==============================] - 11s 23ms/step - loss: 0.0092 - accuracy: 0.9978 - val_loss: 0.0706 - val_accuracy: 0.9798\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(), metrics=['accuracy'])\n", + "\n", + "model.fit(x_train, y_train, batch_size=128, epochs=10, verbose=1,\n", + " validation_data=(x_test, y_test))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "slideshow": { + "slide_type": "subslide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test loss: 0.07055816799402237\n", + "Test accuracy: 0.9797999858856201\n" ] } ], diff --git a/wyk/11_Wielowarstwowe_sieci_neuronowe.ipynb b/wyk/11_Wielowarstwowe_sieci_neuronowe.ipynb index 54776ad..4830abc 100644 --- a/wyk/11_Wielowarstwowe_sieci_neuronowe.ipynb +++ b/wyk/11_Wielowarstwowe_sieci_neuronowe.ipynb @@ -31,6 +31,8 @@ } }, "source": [ + "* Złożenie funkcji liniowych jest funkcją liniową.\n", + "* Głównym zadaniem funkcji aktywacji jest wprowadzenie nieliniowości do sieci neuronowej, żeby model mógł odwzorowywać nie tylko liniowe zależności między danymi.\n", "* Każda funkcja aktywacji ma swoje zalety i wady.\n", "* Różne rodzaje funkcji aktywacji nadają się do różnych zastosowań." ]