ium_464937/predict.py

34 lines
1.2 KiB
Python
Raw Normal View History

2024-04-23 22:10:38 +02:00
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
2024-05-15 00:27:38 +02:00
from keras.metrics import MeanSquaredError
2024-04-23 22:10:38 +02:00
loaded_model = tf.keras.models.load_model('powerlifting_model.h5')
2024-05-15 00:41:20 +02:00
data = pd.read_csv('./data/train.csv')
2024-04-23 22:11:55 +02:00
data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna()
2024-05-15 00:41:20 +02:00
data['Age'] = pd.to_numeric(data['Age'], errors='coerce')
data['BodyweightKg'] = pd.to_numeric(data['BodyweightKg'], errors='coerce')
data['TotalKg'] = pd.to_numeric(data['TotalKg'], errors='coerce')
2024-04-23 22:10:38 +02:00
features = data[['Sex', 'Age', 'BodyweightKg']]
target = data['TotalKg']
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
preprocessor = ColumnTransformer(
transformers=[
('num', StandardScaler(), ['Age', 'BodyweightKg']),
('cat', OneHotEncoder(), ['Sex'])
]
)
X_test_transformed = preprocessor.fit_transform(X_test)
predictions = loaded_model.predict(X_test_transformed)
predictions_df = pd.DataFrame(predictions, columns=['predicted_TotalKg'])
predictions_df.to_csv('powerlifting_test_predictions.csv', index=False)