ium_495719/evaluate.py

62 lines
1.7 KiB
Python
Raw Normal View History

2024-05-14 16:21:16 +02:00
import pandas as pd
import numpy as np
import sys
import os
2024-05-16 03:01:35 +02:00
import mlflow
2024-05-14 16:21:16 +02:00
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from keras.models import load_model
from helper import prepare_tensors
2024-05-15 00:22:12 +02:00
import matplotlib.pyplot as plt
2024-05-14 16:21:16 +02:00
2024-05-16 03:01:35 +02:00
if len(sys.argv) > 1:
build_number = int(sys.argv[1])
else:
build_number = 0
2024-05-14 16:21:16 +02:00
hp_test = pd.read_csv('hp_test.csv')
X_test, Y_test = prepare_tensors(hp_test)
model = load_model('hp_model.h5')
test_predictions = model.predict(X_test)
predictions_df = pd.DataFrame(test_predictions, columns=["Predicted_Price"])
predictions_df.to_csv('hp_test_predictions.csv', index=False)
rmse = np.sqrt(mean_squared_error(Y_test, test_predictions))
mae = mean_absolute_error(Y_test, test_predictions)
r2 = r2_score(Y_test, test_predictions)
metrics_df = pd.DataFrame({
'Build_Number': [build_number],
'RMSE': [rmse],
'MAE': [mae],
'R2': [r2]
})
metrics_file = 'hp_test_metrics.csv'
if os.path.isfile(metrics_file):
existing_metrics_df = pd.read_csv(metrics_file)
updated_metrics_df = pd.concat([existing_metrics_df, metrics_df], ignore_index=True)
else:
updated_metrics_df = metrics_df
2024-05-15 00:22:12 +02:00
updated_metrics_df.to_csv(metrics_file, index=False)
2024-05-15 01:39:13 +02:00
metrics = ['RMSE', 'MAE', 'R2']
for metric in metrics:
plt.plot(updated_metrics_df['Build_Number'], updated_metrics_df[metric], marker='o')
plt.title(f'{metric} vs Builds')
plt.xlabel('Build Number')
plt.ylabel(metric)
plt.grid(True)
2024-05-15 01:43:14 +02:00
plot_file = f'plot_{metric.lower()}.png'
2024-05-15 01:39:13 +02:00
plt.savefig(plot_file)
plt.close()
2024-05-16 03:01:35 +02:00
with mlflow.start_run() as run:
mlflow.log_metric('RMSE', rmse)
mlflow.log_metric('MAE', mae)
mlflow.log_metric('R2', r2)