import pickle import pandas as pd import numpy as np from sklearn.preprocessing import StandardScaler from sklearn.metrics import mean_squared_error, f1_score, accuracy_score import sys import os import matplotlib.pyplot as plt def calculate_metrics(result): rmse = np.sqrt(mean_squared_error(result["Real"], result["Predictions"])) f1 = f1_score(result["Real"], result["Predictions"], average='macro') accuracy = accuracy_score(result["Real"], result["Predictions"]) filename = 'metrics_df.csv' if os.path.exists(filename): metrics_df = pd.read_csv(filename) new_row = pd.DataFrame({'Build number': sys.argv[1], 'RMSE': [rmse], 'F1 Score': [f1], 'Accuracy': [accuracy]}) metrics_df = metrics_df.append(new_row, ignore_index=True) else: metrics_df = pd.DataFrame({'Build number': sys.argv[1], 'RMSE': [rmse], 'F1 Score': [f1], 'Accuracy': [accuracy]}) metrics_df.to_csv(filename, index=False) def create_plots(): metrics_df = pd.read_csv("metrics_df.csv") plt.plot(metrics_df["Build number"], metrics_df["Accuracy"]) plt.xlabel("Build Number") plt.ylabel("Accuracy") plt.title("Accuracy of the model over time") plt.xticks(range(min(metrics_df["Build number"]), max(metrics_df["Build number"]) + 1)) plt.show() plt.savefig("Accuracy_plot.png") plt.plot(metrics_df["Build number"], metrics_df["F1 Score"]) plt.xlabel("Build Number") plt.ylabel("F1 Score") plt.title("F1 Score of the model over time") plt.xticks(range(min(metrics_df["Build number"]), max(metrics_df["Build number"]) + 1)) plt.show() plt.savefig("F1_score_plot.png") plt.plot(metrics_df["Build number"], metrics_df["RMSE"]) plt.xlabel("Build Number") plt.ylabel("RMSE") plt.title("RMSE of the model over time") plt.xticks(range(min(metrics_df["Build number"]), max(metrics_df["Build number"]) + 1)) plt.show() plt.savefig("RMSE_plot.png") np.set_printoptions(threshold=20) file_path = 'model.pkl' with open(file_path, 'rb') as file: model = pickle.load(file) print("Model zostaƂ wczytany z pliku:", file_path) test_df = pd.read_csv("artifacts/docker_test_dataset.csv") Y_test = test_df[['playlist_genre']] X_test = test_df.drop(columns='playlist_genre') Y_test = np.ravel(Y_test) scaler = StandardScaler() numeric_columns = X_test.select_dtypes(include=['int', 'float']).columns X_test_scaled = scaler.fit_transform(X_test[numeric_columns]) Y_pred = model.predict(X_test_scaled) result = pd.DataFrame({'Predictions': Y_pred, "Real": Y_test}) result.to_csv("spotify_genre_predictions.csv", index=False) calculate_metrics(result) create_plots()