From d191de2cef3b6bac2d59930c48f7beb7f336834f Mon Sep 17 00:00:00 2001 From: Wlad Date: Tue, 15 Nov 2022 13:06:57 +0100 Subject: [PATCH] Added test --- Ans.ipynb | 231 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 231 insertions(+) create mode 100644 Ans.ipynb diff --git a/Ans.ipynb b/Ans.ipynb new file mode 100644 index 0000000..d4bcd81 --- /dev/null +++ b/Ans.ipynb @@ -0,0 +1,231 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "\n", + "%matplotlib inline\n", + "%config InlineBackend.figure_format = \"svg\"\n", + "\n", + "from IPython.display import display, Math, Latex\n", + "\n", + "data = pd.read_csv(\"fires_thefts.csv\", names=[\"x\", \"y\"])\n", + "\n", + "x = data[\"x\"].to_numpy()\n", + "y = data[\"y\"].to_numpy()\n", + "\n", + "# Hipoteza: funkcja liniowa jednej zmiennej\n", + "def h(theta, x):\n", + " return theta[0] + theta[1] * x\n", + "\n", + "# Funkcja kosztu\n", + "def J(h, theta, x, y):\n", + " m = len(y)\n", + " return 1.0 / (2 * m) * sum((h(theta, x[i]) - y[i]) ** 2 for i in range(m))\n", + "\n", + "# Wyświetlanie macierzy w LaTeX-u\n", + "def LatexMatrix(matrix):\n", + " ltx = r\"\\left[\\begin{array}\"\n", + " m, n = matrix.shape\n", + " ltx += \"{\" + (\"r\" * n) + \"}\"\n", + " for i in range(m):\n", + " ltx += r\" & \".join([(\"%.4f\" % j.item()) for j in matrix[i]]) + r\" \\\\ \"\n", + " ltx += r\"\\end{array}\\right]\"\n", + " return ltx\n", + "\n", + "def gradient_descent(h, cost_fun, theta, x, y, alpha, eps):\n", + " current_cost = cost_fun(h, theta, x, y)\n", + " history = [\n", + " [current_cost, theta]\n", + " ] # zapiszmy wartości kosztu i parametrów, by potem zrobić wykres\n", + " m = len(y)\n", + " while True:\n", + " new_theta = [\n", + " theta[0] - alpha / float(m) * sum(h(theta, x[i]) - y[i] for i in range(m)),\n", + " theta[1]\n", + " - alpha / float(m) * sum((h(theta, x[i]) - y[i]) * x[i] for i in range(m)),\n", + " ]\n", + " theta = new_theta # jednoczesna aktualizacja - używamy zmiennej tymczasowej\n", + " try:\n", + " prev_cost = current_cost\n", + " current_cost = cost_fun(h, theta, x, y)\n", + " except OverflowError:\n", + " break\n", + " if abs(prev_cost - current_cost) <= eps:\n", + " break\n", + " history.append([current_cost, theta])\n", + " return theta, history\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/latex": [ + "$\\displaystyle \\large\\textrm{Wynik:}\\quad \\theta = \\left[\\begin{array}{r}16.9446 \\\\ 1.3160 \\\\ \\end{array}\\right] \\quad J(\\theta) = 180.4105 \\quad \\textrm{po 5369 iteracjach}$" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "best_theta, history = gradient_descent(h, J, [0.0, 0.0], x, y, alpha=0.003, eps=0.000001)\n", + "\n", + "display(\n", + " Math(\n", + " r\"\\large\\textrm{Wynik:}\\quad \\theta = \"\n", + " + LatexMatrix(np.matrix(best_theta).reshape(2, 1))\n", + " + (r\" \\quad J(\\theta) = %.4f\" % history[-1][0])\n", + " + r\" \\quad \\textrm{po %d iteracjach}\" % len(history)\n", + " )\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "eps= 1.0, cost= 231.741, steps= 4\n", + "eps= 0.1, cost= 226.569, steps= 52\n", + "eps= 0.01, cost= 185.031, steps= 1115\n", + "eps= 0.001, cost= 180.872, steps= 2179\n", + "eps= 0.0001, cost= 180.456, steps= 3242\n", + "eps= 1e-05, cost= 180.415, steps= 4306\n", + "eps= 1e-06, cost= 180.411, steps= 5369\n", + "eps= 1e-07, cost= 180.410, steps= 6433\n", + "eps= 1e-08, cost= 180.410, steps= 7496\n", + "eps= 1e-09, cost= 180.410, steps= 8560\n", + "eps= 1e-10, cost= 180.410, steps= 9623\n", + "eps= 1e-11, cost= 180.410, steps= 10687\n" + ] + } + ], + "source": [ + "\n", + "epss = [10.0**(-n) for n in range(0, 12)]\n", + "alpha=0.003\n", + "costs = []\n", + "lengths = []\n", + "for eps in epss:\n", + " theta_best, history = gradient_descent(\n", + " h, J, [0.0, 0.0], x, y, alpha, eps)\n", + " cost = history[-1][0]\n", + " steps = len(history)\n", + " print(f\"{eps=:7}, {cost=:15.3f}, {steps=:6}\")\n", + " costs.append(cost)\n", + " lengths.append(steps)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "def eps_cost_steps_plot(eps, costs, steps):\n", + " \"\"\"Wykres kosztu i liczby kroków w zależności od eps\"\"\"\n", + " fig, ax1 = plt.subplots()\n", + " ax2 = ax1.twinx()\n", + " ax1.plot(eps, steps, \"--s\", color=\"green\")\n", + " ax2.plot(eps, costs, \":o\", color=\"orange\")\n", + " ax1.set_xscale(\"log\")\n", + " ax1.set_xlabel(\"eps\")\n", + " ax1.set_ylabel(\"liczba kroków\", color=\"green\")\n", + " ax2.set_ylabel(\"koszt\", color=\"orange\")\n", + " plt.show()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": "\n\n\n \n \n \n \n 2022-10-31T17:08:50.563047\n image/svg+xml\n \n \n Matplotlib v3.6.1, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "eps_cost_steps_plot(epss, costs, lengths)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[16.835521154474677, 1.3214970549417684]\n", + "Liczba pozarów - 50 Przewidywalna liczba włamań - 82.91037390156309\n", + "Liczba pozarów - 100 Przewidywalna liczba włamań - 148.98522664865152\n", + "Liczba pozarów - 200 Przewidywalna liczba włamań - 281.13493214282835\n" + ] + } + ], + "source": [ + "example_x = [50, 100, 200]\n", + "print(best_theta)\n", + "example_y = [h(best_theta, ex) for ex in example_x]\n", + "for i in range(3):\n", + " print(f\"Liczba pozarów - {example_x[i]} \"\n", + " f\"Przewidywalna liczba włamań - {example_y[i]}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10.8 ('venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "f37486876ad4b243625dcab03485f0edb2a22cb7fa9db711ceb1161e85adf5f1" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}