PolynomialRegression/Polynomial Regression.ipynb

304 lines
87 KiB
Plaintext
Raw Normal View History

2021-06-24 11:07:41 +02:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Algorytm najszybszego spadku dla regresji wielomianowej. \n",
"Zakładamy, że dysponujemy zbiorem składającym się z dwóch cech (x i y). Modelujemy zależność y od x za pomocą funkcji wielomianowej. Celem projektu jest implementacja metody najszybszego spadku dla tego problemu. Zakładamy kwadratową funkcję straty. Implementacja powinna umożliwiać podanie stopnia wielomianu, który ma być użyty do modelowania. Implementacja powinna zwracać wektor oszacowanych parametrów oraz pokazywać wizualnie zmiany wartości funkcji straty wraz z postępem uczenia."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-06-25 01:36:15 +02:00
"Inches for Height and Pound for Weight"
2021-06-24 11:07:41 +02:00
]
},
{
2021-06-25 01:36:15 +02:00
"cell_type": "markdown",
2021-06-24 11:07:41 +02:00
"metadata": {},
"source": [
2021-06-25 01:36:15 +02:00
"## Import i preprocessing danych"
2021-06-24 11:07:41 +02:00
]
},
{
"cell_type": "code",
2021-06-25 01:36:15 +02:00
"execution_count": 1,
2021-06-24 13:03:10 +02:00
"metadata": {},
"outputs": [],
"source": [
2021-06-25 01:36:15 +02:00
"import pandas as pd\n",
2021-06-25 09:42:03 +02:00
"import random\n",
2021-06-25 01:36:15 +02:00
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"np.set_printoptions(suppress=True)"
2021-06-24 13:03:10 +02:00
]
},
{
"cell_type": "code",
2021-06-25 01:36:15 +02:00
"execution_count": 2,
2021-06-24 11:07:41 +02:00
"metadata": {},
2021-06-25 01:36:15 +02:00
"outputs": [],
2021-06-24 11:07:41 +02:00
"source": [
2021-06-25 01:36:15 +02:00
"degree = 2\n",
"X_plot = np.linspace(0, 1, 1000)\n",
"initial_theta = np.matrix([0] * (degree + 1)).reshape(degree + 1, 1)"
2021-06-24 11:07:41 +02:00
]
},
{
"cell_type": "code",
2021-06-25 01:36:15 +02:00
"execution_count": 3,
2021-06-24 11:07:41 +02:00
"metadata": {},
2021-06-25 01:36:15 +02:00
"outputs": [],
2021-06-24 11:07:41 +02:00
"source": [
2021-06-25 01:36:15 +02:00
"data = pd.read_csv('weight-height.csv')[[\"Height\", \"Weight\"]]\n",
"data = data.dropna()\n",
"data_matrix = np.matrix(data)\n",
"\n",
"m, n_plus_1 = data_matrix.shape\n",
"n = n_plus_1 - 1\n",
"X = (np.ones((m, 1)))\n",
"\n",
"for i in range(1, degree + 1):\n",
" Xn = np.power(data_matrix[:, 0:n], i)\n",
" Xn /= np.amax(Xn, axis=0)\n",
" X = np.concatenate((X, Xn), axis=1)\n",
"\n",
"X = np.matrix(X).reshape(m, degree * n + 1)\n",
"Y = np.matrix(data_matrix[:, -1])"
2021-06-24 11:07:41 +02:00
]
2021-06-24 13:03:10 +02:00
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Metody do regresji wielomianowej"
]
},
{
"cell_type": "code",
2021-06-25 01:36:15 +02:00
"execution_count": 4,
2021-06-25 09:42:03 +02:00
"metadata": {
"scrolled": true
},
"outputs": [],
2021-06-24 13:03:10 +02:00
"source": [
"def polynomial_regression(theta, x):\n",
2021-06-25 01:36:15 +02:00
" return sum(theta * np.power(x, i) for i, theta in enumerate(theta.tolist()))\n",
2021-06-24 13:03:10 +02:00
"\n",
2021-06-25 01:36:15 +02:00
"def mean_squared_error(theta, X, Y):\n",
" J = 1.0 / (2.0 * m) * ((X * theta - Y).T * (X * theta - Y))\n",
" return J.item()\n",
2021-06-24 13:03:10 +02:00
"\n",
2021-06-25 09:42:03 +02:00
"def classic_gradient(theta, X, Y):\n",
2021-06-25 01:36:15 +02:00
" return 1.0 / len(Y) * (X.T * (X * theta - Y)) \n",
2021-06-24 13:03:10 +02:00
"\n",
2021-06-25 09:42:03 +02:00
"# Batch gradient descent (BGD)\n",
"def BGD(X, Y, theta, gradient = classic_gradient, cost_function = mean_squared_error, alpha=0.1, eps=10**-5, max_steps = 10000000000):\n",
2021-06-25 01:36:15 +02:00
" cost = cost_function(theta, X, Y)\n",
2021-06-24 13:03:10 +02:00
" logs = [[cost, theta]]\n",
" \n",
" for i in range(max_steps):\n",
" theta = theta - alpha * gradient(theta, X, Y)\n",
2021-06-25 01:36:15 +02:00
" next_cost = cost_function(theta, X, Y)\n",
2021-06-24 13:03:10 +02:00
" logs.append([next_cost, theta])\n",
" if abs(cost - next_cost) <= eps:\n",
" break\n",
2021-06-25 01:36:15 +02:00
" cost = next_cost\n",
2021-06-24 13:03:10 +02:00
" return theta, logs\n",
2021-06-25 09:42:03 +02:00
"\n",
"# Mini-batch gradient descent (MBGD)\n",
"def MBGD(X, Y, theta, gradient = classic_gradient, cost_function = mean_squared_error, alpha=0.1, epochs=5, batch_size=16):\n",
" cost = cost_function(theta, X, Y)\n",
" logs = [[cost, theta]]\n",
" start, end = 0, batch_size\n",
2021-06-24 13:03:10 +02:00
" \n",
2021-06-25 09:42:03 +02:00
" steps = m / batch_size\n",
" for i in range(epochs):\n",
" zipped_XY = list(zip(X, Y))\n",
" random.shuffle(zipped_XY)\n",
" X_shuffled, Y_shuffled = zip(*zipped_XY)\n",
" X_shuffled = np.concatenate(X_shuffled, axis=0) \n",
" Y_shuffled = np.concatenate(Y_shuffled, axis=0) \n",
" for j in range(int(steps)):\n",
" batch = X_shuffled[start:end,:], Y_shuffled[start:end,:]\n",
" theta = theta - alpha * gradient(theta, batch[0], batch[1])\n",
" cost = cost_function(theta, X, Y)\n",
" logs.append([cost, theta])\n",
"\n",
" if start + batch_size < batch_size:\n",
" start += batch_size\n",
" else:\n",
" start = 0\n",
" end = min(start + batch_size, m)\n",
" return theta, logs\n",
"\n",
"# Stochastic gradient descent (SGD)\n",
"def SGD(X, Y, theta, gradient = classic_gradient, cost_function = mean_squared_error, alpha=0.1, epochs=5, batch_size=16):\n",
" return MBGD(X, Y, theta, gradient, cost_function, alpha, epochs, 1)\n",
"\n",
2021-06-25 01:36:15 +02:00
"#print(mean_squared_error([1,2,1,1],[1,2,43,1]))\n",
"#mean_squared_error(polynomial_regression(initial_theta, X), Y)\n",
2021-06-25 09:42:03 +02:00
"#final_theta, logs_1 = BGD(X, Y, initial_theta)\n",
"#final_theta, logs_2 = MBGD(X, Y, initial_theta, epochs = 30, batch_size = 16)\n",
"final_theta, logs_2 = SGD(X, Y, initial_theta, epochs = 30)"
2021-06-25 01:36:15 +02:00
]
},
{
"attachments": {
"image.png": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA70AAAFDCAYAAAD22mnsAAAgAElEQVR4Aey9i5MV1b33/fwBp97UqefkVl5m9gCHPSC3Y0TfWEblMigMaAheciByDorUaLQgJuflIFFHAoxIEIgKMUclyiUqCnEorAcPRnDAwQHHh0GMAySMWB7HGzMJMpMw7t9bq7tX9+q1e3ev3rt77758qaJm797d6/JZq7vXb/1u/4vwDwRAAARAAARAAARAAARAAARAAAQSSuB/JbRf6BYIgAAIgAAIgAAIgAAIgAAIgAAIEIReTAIQAAEQAAEQAAEQAAEQAAEQAIHEEoDQm9ihRcdAAARAAARAAARAAARAAARAAAQg9GIOgAAIgAAIgAAIgAAIgAAIgAAIJJYAhN7EDi06BgIgAAIgAAIgAAIgAAIgAAIgAKEXcwAEQAAEQAAEQAAEQAAEQAAEQCCxBCD0JnZo0TEQAAEQAAEQAAEQAAEQAAEQAAEIvZgDIAACIAACIAACIAACIAACIAACiSUAoTexQ4uOgQAIgAAIgAAIgAAIgAAIgAAIQOjFHAABEAABEAABEAABEAABEAABEEgsAQi9iR1adAwEQAAEQAAEQAAEQAAEQAAEQABCL+YACIAACIAACIAACIAACIAACIBAYglA6E3s0KJjIAACIAACIAACIAACIAACIAACEHoxB0AABEAABEAABEAABEAABEAABBJLAEJvYocWHQMBEAABEAABEAABEAABEAABEIDQizkAAiAAAiAAAiAAAiAAAiAAAiCQWAIQehM7tOgYCIAACIAACIAACIAACIAACIAAhF7MARAAARAAARAAARAAARAAARAAgcQSgNCb2KFFx0AABEAABEAABEAABEAABEAABCD0Yg6AAAiAAAiAAAiAAAiAAAiAAAgklgCE3sQOLToGAiAAAiAAAiAAAiAAAiAAAiAAoRdzAARAAARAAARKJHBy+700fUwVZYaPpakNy6i5829mid1vbaFfzJtME8ZUUyaTodrvTqWGppfp+JfmKdRz7DV6YsHNVD9hpHXOz5+l1k+tc/AJBEAABEAABECgOAIQeovjhqtAAARAAARAQCNwdvciGlpTR427j1Pb6mma0Fo9Yg7tOPUB7bznaspcNJUWPPEyHen6C31ybBstHn+Bdk7NDRvpxEAvtaycRNnMKJrV9Dy9eeJT6vnzQfrVnBq9nJHz6dXTAA0CIAACIAACIFAKAQi9pdDDtSAAAiAAAukmkOuk9ePOoxH376F+IvrqQKMmrDKNbnZ4lmrqFlJzlx3R+09NNM7JUv2UicQE5E2CZpid/VWLVc7cFz+zF4BvIAACIAACIAACvghA6PWFCyeDAAiAAAiAgEUg984auiKTpcZ9f9cOWgJtRhNmt35oncs/ffzcTFMwztTUUdMh/Vr+O/srCs+1K9rFn/AZBEAABEAABEDAJwEIvT6B4XQQAAEQAAEQ4AQOP3oJVQ19gPZqLry9tPPH/2Rqcedud9bQsmuYJpj9v3Z1u6Yh5uXxv6eb7zDPgaaXU8FfEAABEAABECiOAITe4rjhKhAAARAAARCgnj/uo/0HT+mC67k2WlKr++tagrAEKddFW27+libQVg+aSptOSL8bX9uWftvzHOcrcRQEQAAEQAAEQEAmAKFXJoLvIAACIAACIFAEgVzn0zSlWo/QPPi2F8gx8HLfblpcdaEm0FaNXkXvONWTO0xrLz9fF3qvfpyOOp2DYyAAAiAAAiAAAsoEIPQqo8KJIAACIAACqgT6/vg03T5mJC167a+ql8T+PNFXd/x6Z1H1q4NNlDVMm7NG8Cu547mO1XSFcc4o+PPKePAdBEAABEAABHwTgNDrGxkuAAEQAAEQKEigp5NeW30jjTSEtvT4o/bT7kX/qGlnma9uU6szITHQlbfPb5aaDjmXg6MgAAIgAAIgAALqBCD0qrPCmSAAAiAAAjKBgX7q/eID6nj1d/Twgu+bwi4P1JQaoTfXRg9xf94h8+nVv8ig2Hcr0FVBf95zHaZpc9Xo5dQ+oJfT9+eDtL/lODkW61QVjoEACIAACIAACJgEIPSaKPABBEAABEBAlUDb0n8wtZpcwHX6mxahV/TnzUzfSI7xqc610IOmP68l0IrM+w40mqbN4x/jJtK9tK3h61Rzw0Y6JZ6MzyAAAiAAAiAAAkoEIPQqYcJJIAACIAACIgGb0Dt8LE1tuJ+eaX6Xtt/3v23CcFqEXjHFUCF/XtFX19mfVzCRrrmS1h3Rief+9DRNr66mO3ekxz9anGv4DAIgAAIgAAKlEoDQWypBXA8CIAACaSTwZS/1ftFL/Yb5LUdgE4YzGUqL0NvS+DVT2G/c83eOw/b35KYZ5jnO/rzdtG3ON7RzqoYupwMaW2YSfSFlJq2idi0XsK1IfAEBEAABEAABEFAgAKFXARJOAQEQAAEQUCOQSqE310kb6vW8ulUF/XktLW71oBm0qcuZJ48AXfXP/0m7Th6j7Q9eQ9mLptIjh5wFaedScBQEQAAEQAAEQEAkAKFXpIHPIAACIAACJRFIpdBLTEPL8vOOols2O3rzElE/ta28kjKZLE1buqdwQKqBbmpZM5cmjNHz/Y6ecT81FxCQSxooXAwCIAACIAACKSIAoTdFg42uggAIgEDYBNIp9IZNFeWDAAiAAAiAAAiUQgBCbyn0cC0IgAAIgICNAIReGw58AQEQAAEQAAEQiAABCL0RGAQ0AQRAAASSQgBCb1JGMuR+nOmkDQuupSnX/5IOIEBXyLBRPAgkkUA/ta2eRvXXzadNnXiIJHGEg+4ThN6giaI8EAABEEgxAQi9KR581a6f6aC1PxhEmZo6akKALlVqOA8EQEAmcKaNlow7nzIjZtC6DgT7k/Hgu50AhF47D3wDARAAARAogQCE3hLgpeHSM220YvJgLaDX3Bc/TEOP0UcQAIEQCeQ+2Eyzh1VhEy1ExkkpGkJvUkYS/QABEACBCBCA0BuBQYhqE8510RYtynWGrlnRTv1RbSfaBQIgECsCfQcaaUKmmqpHzKGt2EuL1diVs7EQestJG3WBAAiAQMIJQOhN+AAX271zXbStIUuZTIaGzXuBTg0UWxCuAwEQAIF8Aiefm0XZTAaCbz4aHDEIQOjFVAABEAABEAiMAITewFAmqqCTm2ZpAm/1yPn06ulEdQ2dAQEQiASBXtr54wu158zg27CxFokhiVgjIPRGbEDQHBAAARCIMwEIvXEevXDazk0PM5ksNe5BsJlwKKNUEAAB+nwn3cX8ezNwocBsyCcAoTefCY6AAAiAAAgUSQBCb5HgknrZmd20uFbXvgxfsIv+ktR+ol8gAAKRIHC6+Q7NzBmbbJEYjkg1AkJvpIYDjQEBEACBeBOA0Bvv8Qu29f20e9H5ulnz4Dm09eNgS0dpIAACIJBPoJu2zP6m9typGvUA7T2bfwaOpJMAhN50jjt6DQIgAAKhEIiN0JvrpPXjztMWRswULmn/q69+nI5WOFjU2ZZGusJge+36o6HMNxQaPIGefWvoh9fNp+Zio+D27KYlU6fQT5CSKvjBiVqJPS20dtZk+smOYicL0fvPzKL62Y9Ra4BmILmO1Vo0Z/Zcv3pZKyLFR23eVKg9EHorBB7VggAIgEASCcRH6G2jh2ovSJywawnvWfrJK3+t3BQ710FrjU0FaFsqNwx+a+5qvocmZLJUag7lnhaWQiZLN64/CoHD7yDE5PzcBzvpZ+MvKD0au5HKrKZuIe0qXnaWqFlWJpmaOlr3nvQzvqaSAITeVA47Og0CIAAC4RCIjdBLRDzFhSUoShrfmkupac9n1PtFb5n+f0Kdbftp/xvs/x/od79+mFauWkp3z5pC9RNG+hbQa27YSKfCGWbPUj9+bqbZ3pueOeF5Pk6oPAEecCwozdj7T00l5ldZqgBdeTJoQR6BM220ZNz5lLlqObUGYT5s+P7XTFpO7UGUR0S5zqdpenW19hxi0Zw/zesEDqSNAITetI04+gsCIAACIRKIk9BLZKW4KCT4BrkIKxn7QD/1/k8nHfz9b+nn8ybT2OH6gq5Q2zM1V9K6IyXX6r8AIXhV
}
},
"cell_type": "markdown",
"metadata": {},
"source": [
"Metoda gradientu prostego\n",
"![image.png](attachment:image.png)"
2021-06-24 13:03:10 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Metody do wykresów"
]
},
{
"cell_type": "code",
2021-06-25 09:42:03 +02:00
"execution_count": 5,
2021-06-25 01:36:15 +02:00
"metadata": {
"scrolled": true
},
2021-06-24 13:03:10 +02:00
"outputs": [
{
"data": {
2021-06-25 09:42:03 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAEvCAYAAACHYI+LAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAwL0lEQVR4nO3de3Cc1Znn8d8jWTKSTYQtGwK21QJjSAzmFmHCLQE0gdiJQ2Agk1nBsCE1GiCzC1W7O0lGVcuwU5rKpuZitqYwKDNkGNITQjEQ42AgIAgJhEtkglHMJTa2JBsbbCQiXyRbt7N/nG53S+qWWlJ3v335flJdlt73VeuIN6Cfz3ne55hzTgAAAJi5kqAHAAAAUCgIVgAAAGlCsAIAAEgTghUAAECaEKwAAADShGAFAACQJrOCHoAkLViwwNXW1gY9DAAAgElt2rTpI+fcwkTnciJY1dbWqq2tLehhAAAATMrMOpOdYykQAAAgTQhWAAAAaUKwAgAASBOCFQAAQJoQrAAAANKEYAUAAJAmBCsAAIA0IVgBAIC8F24Pq3ZtrUruKlHt2lqF28OBjCMnGoQCAABMV7g9rMYNjeob7JMkdfZ2qnFDoySpYUVDVsfCjBUAAMhrTa1NR0NVVN9gn5pam7I+FoIVAADIa129XVM6nkkEKwAAkNdqqmqmdDyTCFYAACCvNdc3q7KsctSxyrJKNdc3Z30sBCsAAJDXGlY0qGVNi0JVIZlMoaqQWta0ZL1wXZLMOZf1bzpWXV2da2trC3oYAAAAkzKzTc65ukTnmLECAABIE4IVAABAmhCsAAAA0oRgBQAAkCYEKwAAgDQhWAEAAKQJwQoAACBNCFYAAABpQrACAAA5IdweVu3aWpXcVaLatbUKt4endU2QCFYAACBw4fawGjc0qrO3U05Onb2duvHRG3XbE7dNeE3jhsacClcEKwAAELim1ib1DfaNOubkdG/bvUeDU6Jr+gb7dMOjN+TM7BV7BQIAgMCV3FUip8SZpNRKNeJGkp6PqiyrzMrmy+wVCAAAclpNVU3Sc8NueNJQJfnZq6bWpnQOa8omDVZmtsTMnjezt81si5ndHjn+N2b2vpm9EXmtjvua75rZNjN718yuyuQPAAAA8l9zfbNMNuP36ertSsNopm9WCtcMSfofzrnXzexYSZvM7JnIuX9yzv19/MVmtlzS1yWdIekkSc+a2WnOueF0DhwAAOSucHtYTa1N6urt0vyK+ZKknv4e1VTVaPWy1dq4daO6ertUU1Wj5vpmNaxo0EtdL2ld27oZfd+JZr6yYdIZK+fcHufc65GPD0h6W9KiCb7kakkPOeeOOOd2SNomaWU6BgsAAHLf2Kf3uvu71d3fffRJvnVt6xI+2XfPl+5J6f1DVSH96NofqbKsctTxyrJKNdc3Z+JHStmUaqzMrFbSuZJejRz6SzN708zuN7N5kWOLJO2M+7JdmjiIAQCAApLo6b2JxNdGhapCE14bDU8NKxrUsqZFoaqQTKZQVSgrheuTSTlYmdlcSf8p6Q7n3H5J6yQtlXSOpD2S/iF6aYIvH1dxZmaNZtZmZm379u2b6rgBAECOmk6dU2dvp2rX1mr1stVJa61KrXRUeGpY0aCOOzo0cueIOu7oCDxUSSkGKzMrkw9VYefco5LknPvQOTfsnBuR9APFlvt2SVoS9+WLJe0e+57OuRbnXJ1zrm7hwoUz+RkAAEAOmW6dU3SZMNkTgCNuJCfC00RSeSrQJP2rpLedc/8Yd/zEuMuukfS7yMePS/q6mc02s5MlLZP0WvqGDAAAcllzffO4+qd0SBTYcm2Lm1SeCrxY0o2S2s3sjcixv5b0p2Z2jvwyX4ekv5Ak59wWM3tY0lvyTxR+iycCAQAoHtFZpdufvF3d/d2jzplMV5x8hbb1bFNnb2fK75moMD1aJB+t54oWwsePIdvovA4AADImvu1CfGsFSapdWztpuDLZuK+LSvb1oaqQOu7oSNvPMG5ME3ReT2XGCgAAYFoaVjQknT1qrm8eNeM0VnVFtT76q4+SvneyIvkgm4SypQ0AAJiRqdQ5xV/b1Nqkm86+SSWWOI4cHjo84fsnK5IPskkoS4EAAGDaxtY5xauuqNbdq+5Ww4oGhdvDSWuuJtsHcOw10c2WJY373tnYiHmipUCCFQAAmLZU6qSk1ALUVETrqCaq4coUaqwAAEBGpFrPlM5QFf99J6rhCgI1VgAAYNqCqmcKerPlZAhWAABgWsLt4XE1U9mQC5stJ0OwAgAAUxYtWj84cDAr3y+6f2CubLacDDVWAABgyppam5L2n0q3UFUoK0Xp6UCwAgAAU5atJpzVFdUZ7aKebiwFAgCAKctW8XgQNVwzQbACAABTctsTt2nn/p1Z+36pdnXPBSwFAgCAlN32xG1a17Yuq98z2oC0s7dTjRsaJSln660IVgAAIKlkW9EEpW+wT02tTQQrAACQu+K3hplfMV9S7tY3ZatwfjoIVgAAFLFEM1K5GqiicrXrukTxOgAARSva5DPXg1S8+K7r4fZwzhW2M2MFAECRymaTz3QotdKjXdejoTA6/lwpbGfGCgCAIjF2hif6tF2+GHEjR0NTolAYLWwPEjNWAAAUuER1VPkWqqTRtVXJCtiDLmxnxgoAgAI2nTqqY0qPyeCIpie+tkpKXsAedGE7wQoAgAI2nTqqw8OHMzSa6YmvrYpqrm9WZVnlqOvGhq8gEKwAAChg+bjkF6+yrFIPXPPAuIL0hhUNalnTolBVSCZTqCo0LnwFgRorAAAKVLg9LJPJyQU9lCkptVINu2GFqkJqrm9OGpYaVjQEHqTGIlgBAFCgmlqb8i5UhapC6rijI+hhTBtLgQAAFJhoW4V8WwYsKykLvEZqppixAgCgQITbw7rlZ7fo4MDBoIcyZXPK5ui+Nffl3NLeVDFjBQBAAQi3h3Xz+pvzMlTdWner7ltzn5pam3Jqe5rpMOeCX3utq6tzbW1tQQ8DAIC8lY9Lf1HVFdXqH+of1RaisqwyJ57yS8TMNjnn6hKdY8YKAIACEHTH8Zno7u/Oye1ppoNgBQBAAQi643gm5GNYJFgBAFAAmuubVV5aHvQw0iofwyLBCgCAPBduD6uptUkDwwNBD2Va5pTNSXh89bLVWR7JzBGsAADIY9FNlvO1cL2spEzHzEq86XPLppa8ezqQYAUAQB6bzibLucTM1N3fnfDcsBtW44bGvApXBCsAAPJYvs5URQ0MD6jUSpOez7enAwlWAADksRLL/1/lw25YlWWVSc/n09OB+X83AAAoANH9/VLpPB691u4yjbiRLI4yM0JVIbWsaUk6c5VPTweyVyAAAAGLFqBHa6U6ezvVuKFRkkZ1Hg+3h3X7k7cnrUnKR5VllWqubz76c8b/c4g/ny+YsQIAIGCJCtDH1hZFw1chharqiupR29Y0rGhQy5oWhapCMtnRmaxc3NYmGWasAAAIWLIaovjj+f70XyL9Q/3jjjWsaMirIDUWM1YAAAQsWQ1R/PF8KuBOVb498ZcKghUAAAFrrm9O+FRcZ2+n7C7Tgu8v0PyK+QGMLPMKLTCyFAgAQMCiS19NrU0J+1IVUl3VWPn0xF8qmLECACAg8S0Wmlqb1FzfrFBVKOhhZUR5Sfm4TaLz7Ym/VEwarMxsiZk9b2Zvm9kWM7s9cny+mT1jZlsjf86L+5rvmtk2M3vXzK7K5A8AAEA+it/jz8kdbbGQ753Uk7n/q/fr/qvvz+sn/lJhzrmJLzA7UdKJzrnXzexYSZskfVXSf5XU45z7npl9R9I859y3zWy5pB9LWinpJEnPSjrNOTec7HvU1dW5tra2dPw8AADkhdq1tQlDVKmVajj5r8y8FaoKqeOOjqCHkRZmtsk5V5fo3KQzVs65Pc651yMfH5D0tqRFkq6W9EDksgfkw5Yixx9yzh1xzu2QtE0+ZAEAgIhkRdvDbnjCvfPyVaEVqSczpRorM6uVdK6kVyWd4JzbI/nwJen4yGWLJO2M+7JdkWMAACAiWdF2dUW1zCzLo8m8QitSTyblYGVmcyX9p6Q7nHP7J7o0wbFx641m1mhmbWbWtm/
2021-06-24 13:03:10 +02:00
"text/plain": [
"<Figure size 720x360 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"def plot_polynomial_regression(theta):\n",
" fig = plt.figure(figsize=(10,5))\n",
2021-06-25 01:36:15 +02:00
" Y_plot = polynomial_regression(theta, X_plot).tolist()\n",
2021-06-24 13:03:10 +02:00
" chart = fig.add_subplot()\n",
2021-06-25 01:36:15 +02:00
" chart.plot(X[:,1], Y ,\"go\")\n",
2021-06-24 13:03:10 +02:00
" chart.plot(X_plot, Y_plot, color=\"red\", lw=2, label=f\"degree {len(theta)}\")\n",
" plt.show()\n",
" \n",
2021-06-25 01:36:15 +02:00
"#plot_polynomial_regression(initial_theta)\n",
"plot_polynomial_regression(final_theta)"
2021-06-24 13:03:10 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Wyniki za pomocą gotowej biblioteki"
]
},
{
"cell_type": "code",
2021-06-25 01:36:15 +02:00
"execution_count": 6,
2021-06-24 13:03:10 +02:00
"metadata": {},
"outputs": [],
"source": [
"from sklearn.preprocessing import PolynomialFeatures, StandardScaler\n",
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.linear_model import Ridge, LinearRegression"
]
},
{
"cell_type": "code",
2021-06-25 09:42:03 +02:00
"execution_count": 7,
2021-06-24 13:03:10 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2021-06-25 01:36:15 +02:00
"Pipeline(steps=[('polynomialfeatures', PolynomialFeatures()),\n",
2021-06-24 13:03:10 +02:00
" ('linearregression', LinearRegression())])"
]
},
2021-06-25 09:42:03 +02:00
"execution_count": 7,
2021-06-24 13:03:10 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = make_pipeline(PolynomialFeatures(degree=degree, include_bias=True), \n",
" LinearRegression())\n",
2021-06-25 01:36:15 +02:00
"model.fit(data[[\"Height\"]],Y)"
2021-06-24 13:03:10 +02:00
]
},
{
"cell_type": "code",
2021-06-25 09:42:03 +02:00
"execution_count": 8,
2021-06-24 13:03:10 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2021-06-25 01:36:15 +02:00
"2"
2021-06-24 13:03:10 +02:00
]
},
2021-06-25 09:42:03 +02:00
"execution_count": 8,
2021-06-24 13:03:10 +02:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
2021-06-25 01:36:15 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAl4AAAEvCAYAAACUt89/AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAtVUlEQVR4nO3deZyVZf3/8dfFLoso4oLsKmSomTUuqbl8Xb9oLqWJS6JZpCjiLojm71siKu4ZKhopOolLmVtWauWSIoIbgpEogigmuAAKAjNcvz+uQzPiIMs5c+4zc17Px8MH5/6cZT5zFzNvrvu6ryvEGJEkSVL9a5J1A5IkSeXC4CVJklQkBi9JkqQiMXhJkiQVicFLkiSpSAxekiRJRdIs6wbWVMeOHWOPHj2ybkOSJGm1Jk2aNC/GuPHK9QYTvHr06MHEiROzbkOSJGm1Qggz66p7qVGSJKlIDF6SJElFYvCSJEkqEoOXJElSkRi8JEmSisTgJUmSVCQGL0mSpCIxeEmSJBWJwUuSJKlIDF6SJKnxq66Gu+6CnXaCefMya8PgJUmSGq8VgWvbbeGYY+CFF+DmmzNrp2DBK4TQNITwUgjh4dxxhxDCYyGEN3J/bljrtUNDCNNDCNNCCAcUqgdJkiTgy4HrX/+CHj3gllvg3HMza6uQI16DgddrHQ8Bnogx9gKeyB0TQugD9AO2AQ4ERoUQmhawD0mSVK6+KnBNmwY/+Qm0aJFZewUJXiGELsBBwK21yocCt+ce3w4cVqs+Lsa4JMY4A5gO7FSIPiRJUpkq8cC1QrMCfc61wHlAu1q1TWOMcwBijHNCCJvk6p2B8bVeNztXkyRJWjvV1XDPPfCLX6SwBSlwDRsGxx9fEmGrtrxHvEIIBwMfxBgnrelb6qjFVXz2gBDCxBDCxLlz565zj5IkqZGproZx42C77Up6hGtlhRjx2g04JITQF2gFrB9CuBP4TwihU260qxPwQe71s4Gutd7fBXivrg+OMY4GRgNUVFTUGc4kSVIZqa6Ge+9NI1yv56aWl/AI18ryHvGKMQ6NMXaJMfYgTZr/W4zxOOBBoH/uZf2BB3KPHwT6hRBahhB6Ar2ACfn2IUmSGrHaI1xHH51CVwMY4VpZoeZ41eUy4J4QwknALOBIgBjjlBDCPcBUoAo4NcZYXY99SJKkhqquEa7u3eHCCxvECNfKQowN4wpeRUVFnDhxYtZtSJKkYmjggSuEMCnGWLFyvT5HvCRJktZOAw9cq2PwkiRJ2WvkgWsFg5ckScpOmQSuFQxekiSp+MoscK1g8JIkScVTpoFrBYOXJEmqf2UeuFYweEmSpPpTXQ333ZcC19Spqda9e1ppvn//sglcKxi8JElS4Rm46mTwkiRJhWPg+koGL0mSlD8D1xoxeEmSpHVn4ForBi9JkrT2qqvhnnvgl7/84l2KBq6vZPCSJElrrroa7r47Ba5//SvVuneHCy6AE04wcK2GwUuSJK1eVRWMGweXXALTpqVajx5phKuM1uHKl8FLkiStWlUV3HVXGuF6441U69kzLXz6ox9B8+bZ9tfAGLwkSdKXVVVBZWUa4Zo+PdW23DKNcB13nIFrHRm8JElSjaoquPPOFLjefDPVttwSLroIjj0Wmhkd8uHZkyRJsGwZ3HEHDB8Ob72Var16pUuKxxxj4CoQz6IkSeVs2TK4/Xa49FKYMSPVevdOI1z9+hm4CsyzKUlSOVq6NAWu4cNh5sxU+9rXagJX06bZ9tdINcm6AUmSVBiVkyvpcW0PmvxfE3pc24PKyZVfftHSpXDzzeky4oABKXRtvTX87ncwZUqax2XoqjeOeEmS1AhUTq5kwEMDWLRsEQAz589kwEMDADh2u2NhyRL47W/TJcV33klv6tMnjXAdeaRhq0gMXpIkNQLDnhj239C1wqJli/i/v1zAsU/PhxEjYPbs9MQ228DPfw5HHAFNvPhVTAYvSZIagVnzZ33huOUyOOklGPr0LFh4aipuu20KXD/4gYErIwYvSZIaiMrJlQx+dDAfLv7wC/VAIBKBFLh++iIMeQY6L8y9YLvt4OKL4fDDDVwZM3hJklRCKidXMuyJYcycP3ON3xOJtFoGP50E5/+zJnC9shnMP28wewy+2sBVIgxekiSViJUnyK+JVsvgZxNT4Or0aaq9tBn8357wz292YO6Z19ZPs1onBi9JkkrAwEcGcuPEG9f49esthZ9NgvNqBa4XN4P/2wse/BoQgCUf1UOnyofBS5KkjO07dl+emPHEGr229VI4eSKc+0/Y7LNUm9QJ/t9e8HBvUuDKaRpcIqLUGLwkSapnq5oUvzZaL4VTXoBzn4VNc4Hrhc3TJcVHVgpcK1TH6nX+eqofBi9JkgqsEEFrhTZLYOALcM6zsElu6teEzdMI16O9qDNwrdC9ffe8v74Ky+AlSVKBFDpwnZoLXBvnAtf4zmkO15+34isDF0CLpi0Yvs/wvPtQYRm8JEkqgLWdHL8qbZfAaRPg7Geh4+JUe65LGuH665asNnCt0K5Fu7RVkEqKwUuSpDytzeT4VWn3eS5wPQcb5QLXP7umOVyPrUXgWuGjxd7RWIoMXpIkrcaKRU1nzZ9Ft/bd/nsJ74Q/nkDV8qq8Prvd53D683DWc9Dh81R7pmsa4XpiC9Y6cK3QrX23vPpS/cg7eIUQugJjgc2A5cDoGON1IYQOwN1AD+Bt4Icxxo9z7xkKnARUA6fHGP+Sbx+SJNWHgY8M5KaJN/13S56Z82dy3B+Oy/tz1/8cBo+HM8fDhrnA9VS3NIfrbz1Z58AF0Lp5a+d3lahCjHhVAWfHGF8MIbQDJoUQHgNOAJ6IMV4WQhgCDAHODyH0AfoB2wCbA4+HEHrH6D2vkqTSUjm58guhqxDaL4bBz8MZtQLXk93TCNc/erDOgat7++5fGJFzfldpyjt4xRjnAHNyjxeGEF4HOgOHAnvlXnY78A/g/Fx9XIxxCTAjhDAd2Al4Lt9eJEkqpJ899LOCha4NFqewNXg8bLAk1f7eI83herJnfp/dvX133j7j7XxbVBEUdI5XCKEHsAPwPLBpLpQRY5wTQtgk97LOwPhab5udq0mSVBIKdYciwIaLcoHreWifC1x/65EuKT7VI//Pb96kuZcVG5CCBa8QQlvg98AZMcYFIaxyrLSuJ+r850QIYQAwAKBbNycJSpIKa+VJ822at2HqvKkF+ewNF6UJ86c/D+svTbXHe6bA9Uwe65q2bNqSJdUpwW203kZc97/XeVmxASlI8AohNCeFrsoY4x9y5f+EEDrlRrs6AR/k6rOBrrXe3gV4r67PjTGOBkYDVFRUFO4CuySp7FVOrmTAQwNYtCytTjpz/syCfG6HXOAaVCtwPbZFuqT4zzwClyGrcSjEXY0B+A3weozx6lpPPQj0By7L/flArfrvQghXkybX9wIm5NuHJElfZeXRrU+Xfvrf0FUIG32W1uA6bQK0ywWuv2yZAtdzeVy0adGkBUsuWlKYJpW5Qox47Qb8CJgcQng5V7uAFLjuCSGcBMwCjgSIMU4JIdwDTCXdEXmqdzRKkupTfY1uAXT8LK0yf9oEaLss1f68ZbqkOL7rV71z9ZqGpow5bEy+LaqEhBgbxhW8ioqKOHHixKzbkCQ1QD2u7VHQsAUpcJ3zLJxaK3D9aSv4xZ7wfJ6BC6BN8zbc/L2bvbTYQIUQJsUYK1auu3K9JKlRq5xcWdDQtcmncO4/4ZSJ0CYXuB7ulQLXC13y/3zncjVuBi9JUqOzYj7XzPkzCfksAV9LpwVw7rNw8kRYL7dL0EO9U+CaWKBFke78/p0GrkbO4CVJalRW3uIn3wVQO8+H8/8JP50ErXIzku/fGn65B7y0eb7d1tin5z6GrjJg8JIkNRqVkysLtvBp109gyDNw0kvQMhe47vs6XLIHvNKpIF8CgEDg5IqTGXXQqMJ9qEqWwUuS1Gic/PDJeX9G94/hgqfhhJehxXJYDozbJgWuKZvm/fE1X8dtfsqSwUuS1OAVYoufLT5Kgev4V6D5cqgOULkdDP8uvL7J6t+/Nlo3b+02P2XK4CVJatDyDV295qXAddyr0Cy
2021-06-24 13:03:10 +02:00
"text/plain": [
"<Figure size 720x360 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
2021-06-25 01:36:15 +02:00
"X_plot_2 = np.linspace(-50, 100, 1000)\n",
"Y_plot_2 = model.predict([[x] for x in X_plot_2])\n",
2021-06-24 13:03:10 +02:00
"\n",
"fig = plt.figure(figsize=(10,5))\n",
"chart = fig.add_subplot()\n",
2021-06-25 01:36:15 +02:00
"chart.plot(data[\"Height\"], Y ,\"go\")\n",
"chart.plot(X_plot_2, Y_plot_2, color=\"red\", lw=2, label=f\"degree {degree}\")\n",
2021-06-24 13:03:10 +02:00
"degree"
]
2021-06-24 11:07:41 +02:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}