{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "\n", "%matplotlib inline\n", "%config InlineBackend.figure_format = \"svg\"\n", "\n", "from IPython.display import display, Math, Latex\n", "\n", "data = pd.read_csv(\"fires_thefts.csv\", names=[\"x\", \"y\"])\n", "\n", "x = data[\"x\"].to_numpy()\n", "y = data[\"y\"].to_numpy()\n", "\n", "# Hipoteza: funkcja liniowa jednej zmiennej\n", "def h(theta, x):\n", " return theta[0] + theta[1] * x\n", "\n", "# Funkcja kosztu\n", "def J(h, theta, x, y):\n", " m = len(y)\n", " return 1.0 / (2 * m) * sum((h(theta, x[i]) - y[i]) ** 2 for i in range(m))\n", "\n", "# Wyświetlanie macierzy w LaTeX-u\n", "def LatexMatrix(matrix):\n", " ltx = r\"\\left[\\begin{array}\"\n", " m, n = matrix.shape\n", " ltx += \"{\" + (\"r\" * n) + \"}\"\n", " for i in range(m):\n", " ltx += r\" & \".join([(\"%.4f\" % j.item()) for j in matrix[i]]) + r\" \\\\ \"\n", " ltx += r\"\\end{array}\\right]\"\n", " return ltx\n", "\n", "def gradient_descent(h, cost_fun, theta, x, y, alpha, eps):\n", " current_cost = cost_fun(h, theta, x, y)\n", " history = [\n", " [current_cost, theta]\n", " ] # zapiszmy wartości kosztu i parametrów, by potem zrobić wykres\n", " m = len(y)\n", " while True:\n", " new_theta = [\n", " theta[0] - alpha / float(m) * sum(h(theta, x[i]) - y[i] for i in range(m)),\n", " theta[1]\n", " - alpha / float(m) * sum((h(theta, x[i]) - y[i]) * x[i] for i in range(m)),\n", " ]\n", " theta = new_theta # jednoczesna aktualizacja - używamy zmiennej tymczasowej\n", " try:\n", " prev_cost = current_cost\n", " current_cost = cost_fun(h, theta, x, y)\n", " except OverflowError:\n", " break\n", " if abs(prev_cost - current_cost) <= eps:\n", " break\n", " history.append([current_cost, theta])\n", " return theta, history\n", "\n" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/latex": [ "$\\displaystyle \\large\\textrm{Wynik:}\\quad \\theta = \\left[\\begin{array}{r}16.9446 \\\\ 1.3160 \\\\ \\end{array}\\right] \\quad J(\\theta) = 180.4105 \\quad \\textrm{po 5369 iteracjach}$" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "best_theta, history = gradient_descent(h, J, [0.0, 0.0], x, y, alpha=0.003, eps=0.000001)\n", "\n", "display(\n", " Math(\n", " r\"\\large\\textrm{Wynik:}\\quad \\theta = \"\n", " + LatexMatrix(np.matrix(best_theta).reshape(2, 1))\n", " + (r\" \\quad J(\\theta) = %.4f\" % history[-1][0])\n", " + r\" \\quad \\textrm{po %d iteracjach}\" % len(history)\n", " )\n", ")\n" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "eps= 1.0, cost= 231.741, steps= 4\n", "eps= 0.1, cost= 226.569, steps= 52\n", "eps= 0.01, cost= 185.031, steps= 1115\n", "eps= 0.001, cost= 180.872, steps= 2179\n", "eps= 0.0001, cost= 180.456, steps= 3242\n", "eps= 1e-05, cost= 180.415, steps= 4306\n", "eps= 1e-06, cost= 180.411, steps= 5369\n", "eps= 1e-07, cost= 180.410, steps= 6433\n", "eps= 1e-08, cost= 180.410, steps= 7496\n", "eps= 1e-09, cost= 180.410, steps= 8560\n", "eps= 1e-10, cost= 180.410, steps= 9623\n", "eps= 1e-11, cost= 180.410, steps= 10687\n" ] } ], "source": [ "\n", "epss = [10.0**(-n) for n in range(0, 12)]\n", "alpha=0.003\n", "costs = []\n", "lengths = []\n", "for eps in epss:\n", " theta_best, history = gradient_descent(\n", " h, J, [0.0, 0.0], x, y, alpha, eps)\n", " cost = history[-1][0]\n", " steps = len(history)\n", " print(f\"{eps=:7}, {cost=:15.3f}, {steps=:6}\")\n", " costs.append(cost)\n", " lengths.append(steps)\n", "\n" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "def eps_cost_steps_plot(eps, costs, steps):\n", " \"\"\"Wykres kosztu i liczby kroków w zależności od eps\"\"\"\n", " fig, ax1 = plt.subplots()\n", " ax2 = ax1.twinx()\n", " ax1.plot(eps, steps, \"--s\", color=\"green\")\n", " ax2.plot(eps, costs, \":o\", color=\"orange\")\n", " ax1.set_xscale(\"log\")\n", " ax1.set_xlabel(\"eps\")\n", " ax1.set_ylabel(\"liczba kroków\", color=\"green\")\n", " ax2.set_ylabel(\"koszt\", color=\"orange\")\n", " plt.show()\n" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": "\n\n\n \n \n \n \n 2022-10-31T17:08:50.563047\n image/svg+xml\n \n \n Matplotlib v3.6.1, https://matplotlib.org/\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "eps_cost_steps_plot(epss, costs, lengths)\n" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[16.835521154474677, 1.3214970549417684]\n", "Liczba pozarów - 50 Przewidywalna liczba włamań - 82.91037390156309\n", "Liczba pozarów - 100 Przewidywalna liczba włamań - 148.98522664865152\n", "Liczba pozarów - 200 Przewidywalna liczba włamań - 281.13493214282835\n" ] } ], "source": [ "example_x = [50, 100, 200]\n", "print(best_theta)\n", "example_y = [h(best_theta, ex) for ex in example_x]\n", "for i in range(3):\n", " print(f\"Liczba pozarów - {example_x[i]} \"\n", " f\"Przewidywalna liczba włamań - {example_y[i]}\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.10.8 ('venv': venv)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "f37486876ad4b243625dcab03485f0edb2a22cb7fa9db711ceb1161e85adf5f1" } } }, "nbformat": 4, "nbformat_minor": 2 }