import pandas as pd import sys from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error as rmse reg = LinearRegression() alldata = pd.read_csv( 'test.csv', header=0, usecols=['total_vaccinations', 'daily_vaccinations', 'people_fully_vaccinated']).dropna() X = alldata['total_vaccinations'].to_numpy().ravel().reshape(-1, 1) y = alldata['daily_vaccinations'].to_numpy().ravel().reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2) lin_reg = reg.fit(X_train, y_train) score = lin_reg.score(X_test, y_test) prediction = lin_reg.predict(X_test) build_no = sys.argv[1] if len(sys.argv) > 1 else 0 rmse = rmse(y_test, prediction, squared=False) data = {"rmse": [round(rmse, 2)], "build": [build_no]} df = pd.DataFrame(data=data) with open("evaluation.csv", "a") as f: df.to_csv(f, header=f.tell() == 0, index=False) print("RMSE:", rmse) print("Score:", score)