diff --git a/metrics.py b/metrics.py index c124b1f..d88377f 100644 --- a/metrics.py +++ b/metrics.py @@ -1,24 +1,17 @@ -# import pandas as pd -# from sklearn.metrics import accuracy_score, precision_recall_fscore_support, mean_squared_error -# from math import sqrt -# import sys -# -# data = pd.read_csv('powerlifting_test_predictions.csv') -# y_pred = data['Predictions'] -# y_test = data['Actual'] -# y_test_binary = (y_test >= 3).astype(int) -# +import pandas as pd +from sklearn.metrics import accuracy_score, precision_recall_fscore_support, mean_squared_error +from math import sqrt +import sys + +data = pd.read_csv('powerlifting_test_predictions.csv') +y_pred = data['predicted_TotalKg'] +y_test = data['actual_TotalKg'] +y_test_binary = (y_test >= 3).astype(int) + # build_number = sys.argv[1] -# -# accuracy = accuracy_score(y_test_binary, y_pred.round()) -# precision, recall, f1, _ = precision_recall_fscore_support(y_test_binary, y_pred.round(), average='micro') -# rmse = sqrt(mean_squared_error(y_test, y_pred)) -# -# print(f'Accuracy: {accuracy}') -# print(f'Micro-avg Precision: {precision}') -# print(f'Micro-avg Recall: {recall}') -# print(f'F1 Score: {f1}') -# print(f'RMSE: {rmse}') +build_number = 1 + +rmse = sqrt(mean_squared_error(y_test, y_pred)) with open(r"metrics.txt", "a") as f: f.write(f"{123},{1}\n") \ No newline at end of file diff --git a/metrics.txt b/metrics.txt new file mode 100644 index 0000000..e69de29 diff --git a/plot.py b/plot.py index 72eaa81..24d2201 100644 --- a/plot.py +++ b/plot.py @@ -11,8 +11,8 @@ def main(): plt.plot(build_numbers, accuracy) plt.xlabel("Build Number") - plt.ylabel("Accuracy") - plt.title("Accuracy of the model over time") + plt.ylabel("RMSE") + plt.title("RMSE of the model over time") plt.xticks(range(min(build_numbers), max(build_numbers) + 1)) plt.show() diff --git a/predict.py b/predict.py index aa6060c..1a51da3 100644 --- a/predict.py +++ b/predict.py @@ -6,15 +6,16 @@ from sklearn.pipeline import Pipeline from sklearn.model_selection import train_test_split from keras.metrics import MeanSquaredError - loaded_model = tf.keras.models.load_model('powerlifting_model.h5') -data = pd.read_csv('./data/train.csv') +data = pd.read_csv('openpowerlifting.csv') data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna() + data['Age'] = pd.to_numeric(data['Age'], errors='coerce') data['BodyweightKg'] = pd.to_numeric(data['BodyweightKg'], errors='coerce') data['TotalKg'] = pd.to_numeric(data['TotalKg'], errors='coerce') + features = data[['Sex', 'Age', 'BodyweightKg']] target = data['TotalKg'] @@ -26,8 +27,9 @@ preprocessor = ColumnTransformer( ('cat', OneHotEncoder(), ['Sex']) ] ) -X_test_transformed = preprocessor.fit_transform(X_test) +X_test_transformed = preprocessor.fit_transform(X_test) predictions = loaded_model.predict(X_test_transformed) predictions_df = pd.DataFrame(predictions, columns=['predicted_TotalKg']) +predictions_df['actual_TotalKg'] = y_test.reset_index(drop=True) predictions_df.to_csv('powerlifting_test_predictions.csv', index=False)