2024-04-14 21:59:34 +02:00
|
|
|
import pickle
|
|
|
|
import pandas as pd
|
|
|
|
import numpy as np
|
|
|
|
from sklearn.preprocessing import StandardScaler
|
2024-05-09 01:56:58 +02:00
|
|
|
from sklearn.metrics import mean_squared_error, f1_score, accuracy_score
|
|
|
|
import sys
|
|
|
|
import os
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
def calculate_metrics(result):
|
|
|
|
rmse = np.sqrt(mean_squared_error(result["Real"], result["Predictions"]))
|
|
|
|
f1 = f1_score(result["Real"], result["Predictions"], average='macro')
|
|
|
|
accuracy = accuracy_score(result["Real"], result["Predictions"])
|
|
|
|
|
|
|
|
filename = 'metrics_df.csv'
|
|
|
|
if os.path.exists(filename):
|
|
|
|
metrics_df = pd.read_csv(filename)
|
|
|
|
new_row = pd.DataFrame({'Build number': sys.argv[1], 'RMSE': [rmse], 'F1 Score': [f1], 'Accuracy': [accuracy]})
|
2024-05-09 02:28:21 +02:00
|
|
|
metrics_df = pd.concat([metrics_df, new_row], ignore_index=True)
|
2024-05-09 01:56:58 +02:00
|
|
|
else:
|
|
|
|
metrics_df = pd.DataFrame({'Build number': sys.argv[1], 'RMSE': [rmse], 'F1 Score': [f1], 'Accuracy': [accuracy]})
|
|
|
|
|
|
|
|
|
|
|
|
metrics_df.to_csv(filename, index=False)
|
|
|
|
|
|
|
|
def create_plots():
|
|
|
|
|
|
|
|
metrics_df = pd.read_csv("metrics_df.csv")
|
|
|
|
|
|
|
|
plt.plot(metrics_df["Build number"], metrics_df["Accuracy"])
|
|
|
|
plt.xlabel("Build Number")
|
|
|
|
plt.ylabel("Accuracy")
|
|
|
|
plt.title("Accuracy of the model over time")
|
|
|
|
plt.xticks(range(min(metrics_df["Build number"]), max(metrics_df["Build number"]) + 1))
|
|
|
|
plt.show()
|
|
|
|
plt.savefig("Accuracy_plot.png")
|
|
|
|
|
|
|
|
plt.plot(metrics_df["Build number"], metrics_df["F1 Score"])
|
|
|
|
plt.xlabel("Build Number")
|
|
|
|
plt.ylabel("F1 Score")
|
|
|
|
plt.title("F1 Score of the model over time")
|
|
|
|
plt.xticks(range(min(metrics_df["Build number"]), max(metrics_df["Build number"]) + 1))
|
|
|
|
plt.show()
|
|
|
|
plt.savefig("F1_score_plot.png")
|
|
|
|
|
|
|
|
plt.plot(metrics_df["Build number"], metrics_df["RMSE"])
|
|
|
|
plt.xlabel("Build Number")
|
|
|
|
plt.ylabel("RMSE")
|
|
|
|
plt.title("RMSE of the model over time")
|
|
|
|
plt.xticks(range(min(metrics_df["Build number"]), max(metrics_df["Build number"]) + 1))
|
|
|
|
plt.show()
|
|
|
|
plt.savefig("RMSE_plot.png")
|
2024-04-14 21:59:34 +02:00
|
|
|
|
|
|
|
np.set_printoptions(threshold=20)
|
|
|
|
|
|
|
|
file_path = 'model.pkl'
|
|
|
|
with open(file_path, 'rb') as file:
|
|
|
|
model = pickle.load(file)
|
|
|
|
print("Model został wczytany z pliku:", file_path)
|
|
|
|
|
2024-05-09 01:56:58 +02:00
|
|
|
test_df = pd.read_csv("artifacts/docker_test_dataset.csv")
|
2024-04-14 21:59:34 +02:00
|
|
|
|
|
|
|
Y_test = test_df[['playlist_genre']]
|
|
|
|
X_test = test_df.drop(columns='playlist_genre')
|
|
|
|
Y_test = np.ravel(Y_test)
|
|
|
|
|
|
|
|
scaler = StandardScaler()
|
|
|
|
numeric_columns = X_test.select_dtypes(include=['int', 'float']).columns
|
|
|
|
X_test_scaled = scaler.fit_transform(X_test[numeric_columns])
|
|
|
|
|
|
|
|
Y_pred = model.predict(X_test_scaled)
|
|
|
|
|
2024-05-09 01:56:58 +02:00
|
|
|
result = pd.DataFrame({'Predictions': Y_pred, "Real": Y_test})
|
|
|
|
result.to_csv("spotify_genre_predictions.csv", index=False)
|
2024-04-14 21:59:34 +02:00
|
|
|
|
2024-05-09 01:56:58 +02:00
|
|
|
calculate_metrics(result)
|
|
|
|
create_plots()
|