Karolina Oparczyk
3e23841578
All checks were successful
s434765-training/pipeline/head This commit looks good
54 lines
1.9 KiB
Python
54 lines
1.9 KiB
Python
import pandas as pd
|
|
import numpy as np
|
|
from sklearn.metrics import mean_squared_error
|
|
from tensorflow import keras
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
def evaluate_model():
|
|
model = keras.models.load_model('model')
|
|
data = pd.read_csv("data_dev", sep=',', error_bad_lines=False,
|
|
skip_blank_lines=True, nrows=527, names=["video_id", "last_trending_date",
|
|
"publish_date", "publish_hour", "category_id",
|
|
"channel_title", "views", "likes", "dislikes",
|
|
"comment_count"]).dropna()
|
|
X_test = data.loc[:, data.columns == "views"].astype(int)
|
|
y_test = data.loc[:, data.columns == "likes"].astype(int)
|
|
|
|
min_val_sub = np.min(X_test)
|
|
max_val_sub = np.max(X_test)
|
|
X_test = (X_test - min_val_sub) / (max_val_sub - min_val_sub)
|
|
print(min_val_sub)
|
|
print(max_val_sub)
|
|
|
|
min_val_like = np.min(y_test)
|
|
max_val_like = np.max(y_test)
|
|
print(min_val_like)
|
|
print(max_val_like)
|
|
|
|
prediction = model.predict(X_test)
|
|
|
|
prediction_denormalized = []
|
|
for pred in prediction:
|
|
denorm = pred[0] * (max_val_like[0] - min_val_like[0]) + min_val_like[0]
|
|
prediction_denormalized.append(denorm)
|
|
|
|
f = open("predictions.txt", "w")
|
|
for (pred, test) in zip(prediction_denormalized, y_test.values):
|
|
f.write("predicted: %s expected: %s\n" % (str(pred), str(test[0])))
|
|
|
|
error = mean_squared_error(y_test, prediction_denormalized)
|
|
print(error)
|
|
|
|
with open("rmse.txt", "a") as file:
|
|
file.write(str(error) + "\n")
|
|
|
|
with open("rmse.txt", "r") as file:
|
|
lines = file.readlines()
|
|
plt.plot(range(len(lines)), [line[:-2] for line in lines])
|
|
plt.tight_layout()
|
|
plt.ylabel('RMSE')
|
|
plt.xlabel('evaluation no')
|
|
plt.savefig('evaluation.png')
|
|
return error
|