2021-05-10 21:11:07 +02:00
|
|
|
import pandas as pd
|
2021-05-13 22:27:41 +02:00
|
|
|
import sys
|
2021-05-10 21:11:07 +02:00
|
|
|
from sklearn.linear_model import LinearRegression
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
from sklearn.metrics import mean_squared_error as rmse
|
|
|
|
reg = LinearRegression()
|
|
|
|
|
|
|
|
alldata = pd.read_csv(
|
2021-05-13 22:27:41 +02:00
|
|
|
'test.csv', header=0,
|
|
|
|
usecols=['total_vaccinations', 'daily_vaccinations', 'people_fully_vaccinated']).dropna()
|
2021-05-10 21:11:07 +02:00
|
|
|
|
2021-05-13 22:27:41 +02:00
|
|
|
X = alldata['total_vaccinations'].to_numpy().ravel().reshape(-1, 1)
|
|
|
|
y = alldata['daily_vaccinations'].to_numpy().ravel().reshape(-1, 1)
|
2021-05-10 21:11:07 +02:00
|
|
|
|
2021-05-14 21:25:04 +02:00
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
|
2021-05-10 21:11:07 +02:00
|
|
|
lin_reg = reg.fit(X_train, y_train)
|
|
|
|
score = lin_reg.score(X_test, y_test)
|
|
|
|
prediction = lin_reg.predict(X_test)
|
2021-05-13 22:27:41 +02:00
|
|
|
|
|
|
|
build_no = sys.argv[1] if len(sys.argv) > 1 else 0
|
2021-05-14 21:25:04 +02:00
|
|
|
rmse = rmse(y_test, prediction, squared=False)
|
|
|
|
data = {"rmse": [round(rmse, 2)], "build": [build_no]}
|
2021-05-13 22:27:41 +02:00
|
|
|
df = pd.DataFrame(data=data)
|
|
|
|
|
|
|
|
with open("evaluation.csv", "a") as f:
|
|
|
|
df.to_csv(f, header=f.tell() == 0, index=False)
|
|
|
|
|
2021-05-14 21:25:04 +02:00
|
|
|
print("RMSE:", rmse)
|
2021-05-10 21:11:07 +02:00
|
|
|
print("Score:", score)
|