3.1 KiB
3.1 KiB
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
data = pd.read_csv('fires_thefts.csv', names = ['fires', 'thefts'])
x = data[['fires']].to_numpy().flatten()
y = data[['thefts']].to_numpy().flatten()
def gradient_descent(h, cost_fun, theta, x, y, alpha, eps, max_steps = 1000000):
current_cost = cost_fun(h, theta, x, y)
log = [[current_cost, theta]]
m = len(y)
steps_counter = 0
while True and steps_counter < max_steps:
steps_counter += 1
new_theta = [
theta[0] - alpha/float(m) * sum(h(theta, x[i]) - y[i]
for i in range(m)),
theta[1] - alpha/float(m) * sum((h(theta, x[i]) - y[i]) * x[i]
for i in range(m))]
theta = new_theta
prev_cost = current_cost
current_cost = cost_fun(h, theta, x, y)
if abs(prev_cost - current_cost) <= eps:
break
log.append([current_cost, theta])
return theta, log
def J(h, theta, x, y):
m = len(y)
return 1.0 / (2 * m) * sum((h(theta, x[i]) - y[i])**2 for i in range(m))
def h(theta, x):
return theta[0] + theta[1] * x
def mse(expected, predicted):
m = len(expected)
if len(predicted) != m:
raise Exception('Wektory mają różne długości!')
return 1.0 / (2 * m) * sum((expected[i] - predicted[i])**2 for i in range(m))
best_theta, log = gradient_descent(h, J, [0.0, 0.0], x, y, alpha=0.001, eps=0.0000001, max_steps = 200)
predicted_50 = h(best_theta, 50)
predicted_100 = h(best_theta, 100)
predicted_200 = h(best_theta, 200)
print(f'Predicted amount of thefts for 50 fires: {predicted_50}')
print(f'Predicted amount of thefts for 100 fires: {predicted_100}')
print(f'Predicted amount of thefts for 200 fires: {predicted_200}')
Predicted amount of thefts for 50 fires: 100.5454538681846 Predicted amount of thefts for 100 fires: 195.86844057898603 Predicted amount of thefts for 200 fires: 386.5144140005889