304 lines
87 KiB
Plaintext
304 lines
87 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Algorytm najszybszego spadku dla regresji wielomianowej. \n",
|
|
"Zakładamy, że dysponujemy zbiorem składającym się z dwóch cech (x i y). Modelujemy zależność y od x za pomocą funkcji wielomianowej. Celem projektu jest implementacja metody najszybszego spadku dla tego problemu. Zakładamy kwadratową funkcję straty. Implementacja powinna umożliwiać podanie stopnia wielomianu, który ma być użyty do modelowania. Implementacja powinna zwracać wektor oszacowanych parametrów oraz pokazywać wizualnie zmiany wartości funkcji straty wraz z postępem uczenia."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Inches for Height and Pound for Weight"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Import i preprocessing danych"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import random\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import numpy as np\n",
|
|
"np.set_printoptions(suppress=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"degree = 2\n",
|
|
"X_plot = np.linspace(0, 1, 1000)\n",
|
|
"initial_theta = np.matrix([0] * (degree + 1)).reshape(degree + 1, 1)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"data = pd.read_csv('weight-height.csv')[[\"Height\", \"Weight\"]]\n",
|
|
"data = data.dropna()\n",
|
|
"data_matrix = np.matrix(data)\n",
|
|
"\n",
|
|
"m, n_plus_1 = data_matrix.shape\n",
|
|
"n = n_plus_1 - 1\n",
|
|
"X = (np.ones((m, 1)))\n",
|
|
"\n",
|
|
"for i in range(1, degree + 1):\n",
|
|
" Xn = np.power(data_matrix[:, 0:n], i)\n",
|
|
" Xn /= np.amax(Xn, axis=0)\n",
|
|
" X = np.concatenate((X, Xn), axis=1)\n",
|
|
"\n",
|
|
"X = np.matrix(X).reshape(m, degree * n + 1)\n",
|
|
"Y = np.matrix(data_matrix[:, -1])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Metody do regresji wielomianowej"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"def polynomial_regression(theta, x):\n",
|
|
" return sum(theta * np.power(x, i) for i, theta in enumerate(theta.tolist()))\n",
|
|
"\n",
|
|
"def mean_squared_error(theta, X, Y):\n",
|
|
" J = 1.0 / (2.0 * m) * ((X * theta - Y).T * (X * theta - Y))\n",
|
|
" return J.item()\n",
|
|
"\n",
|
|
"def classic_gradient(theta, X, Y):\n",
|
|
" return 1.0 / len(Y) * (X.T * (X * theta - Y)) \n",
|
|
"\n",
|
|
"# Batch gradient descent (BGD)\n",
|
|
"def BGD(X, Y, theta, gradient = classic_gradient, cost_function = mean_squared_error, alpha=0.1, eps=10**-5, max_steps = 10000000000):\n",
|
|
" cost = cost_function(theta, X, Y)\n",
|
|
" logs = [[cost, theta]]\n",
|
|
" \n",
|
|
" for i in range(max_steps):\n",
|
|
" theta = theta - alpha * gradient(theta, X, Y)\n",
|
|
" next_cost = cost_function(theta, X, Y)\n",
|
|
" logs.append([next_cost, theta])\n",
|
|
" if abs(cost - next_cost) <= eps:\n",
|
|
" break\n",
|
|
" cost = next_cost\n",
|
|
" return theta, logs\n",
|
|
"\n",
|
|
"# Mini-batch gradient descent (MBGD)\n",
|
|
"def MBGD(X, Y, theta, gradient = classic_gradient, cost_function = mean_squared_error, alpha=0.1, epochs=5, batch_size=16):\n",
|
|
" cost = cost_function(theta, X, Y)\n",
|
|
" logs = [[cost, theta]]\n",
|
|
" start, end = 0, batch_size\n",
|
|
" \n",
|
|
" steps = m / batch_size\n",
|
|
" for i in range(epochs):\n",
|
|
" zipped_XY = list(zip(X, Y))\n",
|
|
" random.shuffle(zipped_XY)\n",
|
|
" X_shuffled, Y_shuffled = zip(*zipped_XY)\n",
|
|
" X_shuffled = np.concatenate(X_shuffled, axis=0) \n",
|
|
" Y_shuffled = np.concatenate(Y_shuffled, axis=0) \n",
|
|
" for j in range(int(steps)):\n",
|
|
" batch = X_shuffled[start:end,:], Y_shuffled[start:end,:]\n",
|
|
" theta = theta - alpha * gradient(theta, batch[0], batch[1])\n",
|
|
" cost = cost_function(theta, X, Y)\n",
|
|
" logs.append([cost, theta])\n",
|
|
"\n",
|
|
" if start + batch_size < batch_size:\n",
|
|
" start += batch_size\n",
|
|
" else:\n",
|
|
" start = 0\n",
|
|
" end = min(start + batch_size, m)\n",
|
|
" return theta, logs\n",
|
|
"\n",
|
|
"# Stochastic gradient descent (SGD)\n",
|
|
"def SGD(X, Y, theta, gradient = classic_gradient, cost_function = mean_squared_error, alpha=0.1, epochs=5, batch_size=16):\n",
|
|
" return MBGD(X, Y, theta, gradient, cost_function, alpha, epochs, 1)\n",
|
|
"\n",
|
|
"#print(mean_squared_error([1,2,1,1],[1,2,43,1]))\n",
|
|
"#mean_squared_error(polynomial_regression(initial_theta, X), Y)\n",
|
|
"#final_theta, logs_1 = BGD(X, Y, initial_theta)\n",
|
|
"#final_theta, logs_2 = MBGD(X, Y, initial_theta, epochs = 30, batch_size = 16)\n",
|
|
"final_theta, logs_2 = SGD(X, Y, initial_theta, epochs = 30)"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {
|
|
"image.png": {
|
|
"image/png": ""
|
|
}
|
|
},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Metoda gradientu prostego\n",
|
|
"![image.png](attachment:image.png)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Metody do wykresów"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"image/png": "\n",
|
|
"text/plain": [
|
|
"<Figure size 720x360 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {
|
|
"needs_background": "light"
|
|
},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"def plot_polynomial_regression(theta):\n",
|
|
" fig = plt.figure(figsize=(10,5))\n",
|
|
" Y_plot = polynomial_regression(theta, X_plot).tolist()\n",
|
|
" chart = fig.add_subplot()\n",
|
|
" chart.plot(X[:,1], Y ,\"go\")\n",
|
|
" chart.plot(X_plot, Y_plot, color=\"red\", lw=2, label=f\"degree {len(theta)}\")\n",
|
|
" plt.show()\n",
|
|
" \n",
|
|
"#plot_polynomial_regression(initial_theta)\n",
|
|
"plot_polynomial_regression(final_theta)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Wyniki za pomocą gotowej biblioteki"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from sklearn.preprocessing import PolynomialFeatures, StandardScaler\n",
|
|
"from sklearn.pipeline import make_pipeline\n",
|
|
"from sklearn.linear_model import Ridge, LinearRegression"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Pipeline(steps=[('polynomialfeatures', PolynomialFeatures()),\n",
|
|
" ('linearregression', LinearRegression())])"
|
|
]
|
|
},
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"model = make_pipeline(PolynomialFeatures(degree=degree, include_bias=True), \n",
|
|
" LinearRegression())\n",
|
|
"model.fit(data[[\"Height\"]],Y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"2"
|
|
]
|
|
},
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
},
|
|
{
|
|
"data": {
|
|
"image/png": "\n",
|
|
"text/plain": [
|
|
"<Figure size 720x360 with 1 Axes>"
|
|
]
|
|
},
|
|
"metadata": {
|
|
"needs_background": "light"
|
|
},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"X_plot_2 = np.linspace(-50, 100, 1000)\n",
|
|
"Y_plot_2 = model.predict([[x] for x in X_plot_2])\n",
|
|
"\n",
|
|
"fig = plt.figure(figsize=(10,5))\n",
|
|
"chart = fig.add_subplot()\n",
|
|
"chart.plot(data[\"Height\"], Y ,\"go\")\n",
|
|
"chart.plot(X_plot_2, Y_plot_2, color=\"red\", lw=2, label=f\"degree {degree}\")\n",
|
|
"degree"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|