2024-05-14 19:21:17 +02:00
|
|
|
import sys
|
2024-04-23 22:10:38 +02:00
|
|
|
import pandas as pd
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
|
|
|
from sklearn.compose import ColumnTransformer
|
|
|
|
from sklearn.pipeline import Pipeline
|
|
|
|
from tensorflow.keras.models import Sequential
|
|
|
|
from tensorflow.keras.layers import Dense
|
|
|
|
import tensorflow as tf
|
|
|
|
|
2024-05-14 22:11:46 +02:00
|
|
|
data = pd.read_csv('./data/train.csv')
|
2024-04-23 22:10:38 +02:00
|
|
|
|
|
|
|
data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna()
|
|
|
|
|
|
|
|
features = data[['Sex', 'Age', 'BodyweightKg']]
|
|
|
|
target = data['TotalKg']
|
|
|
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
|
|
|
|
|
|
|
|
preprocessor = ColumnTransformer(
|
|
|
|
transformers=[
|
|
|
|
('num', StandardScaler(), ['Age', 'BodyweightKg']),
|
|
|
|
('cat', OneHotEncoder(), ['Sex'])
|
|
|
|
]
|
|
|
|
)
|
|
|
|
|
|
|
|
pipeline = Pipeline(steps=[
|
|
|
|
('preprocessor', preprocessor),
|
|
|
|
('model', Sequential([
|
2024-04-23 22:11:55 +02:00
|
|
|
Dense(64, activation='relu', input_dim=4),
|
2024-04-23 22:10:38 +02:00
|
|
|
Dense(64, activation='relu'),
|
|
|
|
Dense(1)
|
|
|
|
]))
|
|
|
|
])
|
|
|
|
|
|
|
|
pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae'])
|
|
|
|
|
2024-05-14 19:21:17 +02:00
|
|
|
pipeline.fit(X_train, y_train, model__epochs=int(sys.argv[1]), model__validation_split=0.1)
|
2024-04-23 22:10:38 +02:00
|
|
|
|
|
|
|
pipeline['model'].save('powerlifting_model.h5')
|