ml-2023SZ/zad3.py

import numpy as np
import matplotlib.pyplot as plt
import csv

data = csv.reader(open("fires_thefts.csv"), delimiter=',')
x = list()
y = list()
for xi, yi in data:
    x.append(float(xi))
    y.append(float(yi))


def h(theta, x):
    return theta[0] + theta[1] * x


def J(h, theta, x, y):
    """Funkcja kosztu"""
    m = len(y)
    return 1.0 / (2 * m) * sum((h(theta, x[i]) - y[i]) ** 2 for i in range(m))


def gradient_descent(h, cost_fun, theta, x, y, alpha, eps):
    current_cost = cost_fun(h, theta, x, y)
    history = [
        [current_cost, theta]
    ]  # zapiszmy wartości kosztu i parametrów, by potem zrobić wykres
    m = len(y)
    while True:
        new_theta = [
            theta[0] - alpha / float(m) * sum(h(theta, x[i]) - y[i] for i in range(m)),
            theta[1]
            - alpha / float(m) * sum((h(theta, x[i]) - y[i]) * x[i] for i in range(m)),
        ]
        theta = new_theta  # jednoczesna aktualizacja - używamy zmiennej tymczasowej
        try:
            prev_cost = current_cost
            current_cost = cost_fun(h, theta, x, y)
        except OverflowError:
            break
        if abs(prev_cost - current_cost) <= eps:
            break
        history.append([current_cost, theta])
    return theta, history


alfa = 0.001
eps = 0.00001
best_theta, history = gradient_descent(h, J, [0.0, 0.0], x, y, alfa, eps)
print("Wynik = ", np.matrix(best_theta), ", J(θ) = %.4f" % history[-1][0], "po %d iteracjach" % len(history))
print("Wynik dla eps = ", float(eps), ", α = ", alfa)

print("Predykcja dla 50 pożarów: ", h(best_theta, 50))
print("Predykcja dla 100 pożarów: ", h(best_theta, 100))
print("Predykcja dla 200 pożarów: ", h(best_theta, 200))

best_theta, history_1 = gradient_descent(h, J, [0.0, 0.0], x, y, 0.01, eps)
best_theta, history_01 = gradient_descent(h, J, [0.0, 0.0], x, y, 0.025, eps)
best_theta, history_0076 = gradient_descent(h, J, [0.0, 0.0], x, y, 0.0076, eps)


fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(1, 1, 1)
ax.set_ylabel('Warto funkcji kosztu J(θ)')
ax.set_xlabel('Ilość iteracji')
ax.set_title("Wykres zależności między kosztem a różnymi wartościami α")
x_axis = np.arange(0, 201, 1)
y_01 = [history_1[i][0] for i in x_axis]
y_025 = [history_01[i][0] for i in x_axis]
y_0076 = [history_0076[i][0] for i in x_axis]

ax.plot(x_axis, y_01, color='brown', marker="x", label='0.01')
ax.plot(x_axis, y_025, color='purple', marker="o", label='0.025')
ax.plot(x_axis, y_0076, color='orange', marker="^", label='0.0076')
ax.legend()
plt.axis([0, 180, 0, 10**10])
plt.show()