21 lines
824 B
Python
21 lines
824 B
Python
import numpy as np
|
|
import pandas as pd
|
|
from sklearn.linear_model import LinearRegression
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.metrics import mean_squared_error as rmse
|
|
|
|
reg = LinearRegression()
|
|
|
|
alldata = pd.read_csv(
|
|
'country_vaccinations.csv', header=0, skipinitialspace=True,
|
|
usecols=['total_vaccinations', 'people_vaccinated', 'daily_vaccinations' ,'daily_vaccinations_per_million']).dropna()
|
|
|
|
X = alldata[[c for c in alldata.columns if c != 'daily_vaccinations']]
|
|
y = alldata['daily_vaccinations']
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 6)
|
|
lin_reg = reg.fit(X_train, y_train)
|
|
score = lin_reg.score(X_test, y_test)
|
|
prediction = lin_reg.predict(X_test)
|
|
print("RMSE:", rmse(y_test, prediction, squared=False))
|
|
print("Score:", score) |