36 lines
1.4 KiB
Python
36 lines
1.4 KiB
Python
|
import mlflow.sklearn
|
||
|
import pandas as pd
|
||
|
import numpy as np
|
||
|
from sklearn.preprocessing import StandardScaler
|
||
|
from sklearn.metrics import accuracy_score
|
||
|
|
||
|
def load_model_and_predict():
|
||
|
with mlflow.start_run():
|
||
|
|
||
|
model = mlflow.sklearn.load_model("model")
|
||
|
|
||
|
test_df = pd.read_csv("docker_test_dataset.csv")
|
||
|
Y_test = test_df[['playlist_genre']]
|
||
|
X_test = test_df.drop(columns='playlist_genre')
|
||
|
Y_test = np.ravel(Y_test)
|
||
|
|
||
|
scaler = StandardScaler()
|
||
|
numeric_columns = X_test.select_dtypes(include=['int', 'float']).columns
|
||
|
X_test_scaled = scaler.fit_transform(X_test[numeric_columns])
|
||
|
Y_pred = model.predict(X_test_scaled)
|
||
|
|
||
|
with open('model_predictions.txt', 'w') as f:
|
||
|
labels_dict = {0: 'edm', 1 : 'latin', 2 : 'pop', 3 : 'r&b', 4 : 'rap', 5 :'rock'}
|
||
|
Y_test_labels = [labels_dict[number] for number in Y_test]
|
||
|
Y_pred_labels = [labels_dict[number] for number in Y_pred]
|
||
|
f.write("Real:" + str(Y_test_labels[:20])+ " \nPredicted: "+ str(Y_pred_labels[:20]))
|
||
|
accuracy = accuracy_score(Y_test, Y_pred)
|
||
|
f.write("\nAccuracy:" + str(accuracy))
|
||
|
|
||
|
mlflow.log_metric("accuracy", accuracy)
|
||
|
mlflow.log_artifact("model_predictions.txt")
|
||
|
mlflow.end_run()
|
||
|
if __name__ == "__main__":
|
||
|
mlflow.set_tracking_uri("http://localhost:5000")
|
||
|
load_model_and_predict()
|