ium_464937/model.py

44 lines
1.3 KiB
Python
Raw Normal View History

2024-05-14 19:21:17 +02:00
import sys
2024-04-23 22:10:38 +02:00
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow as tf
2024-05-14 23:30:16 +02:00
data = pd.read_csv('./data/train.csv')
2024-05-14 22:29:02 +02:00
2024-05-14 22:32:48 +02:00
data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna()
2024-05-14 23:38:30 +02:00
data['Age'] = pd.to_numeric(data['Age'], errors='coerce')
2024-05-14 22:32:48 +02:00
features = data[['Sex', 'Age', 'BodyweightKg']]
target = data['TotalKg']
2024-04-23 22:10:38 +02:00
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
preprocessor = ColumnTransformer(
transformers=[
2024-05-14 22:32:48 +02:00
('num', StandardScaler(), ['Age', 'BodyweightKg']),
('cat', OneHotEncoder(), ['Sex'])
2024-05-14 23:19:35 +02:00
],
2024-04-23 22:10:38 +02:00
)
pipeline = Pipeline(steps=[
('preprocessor', preprocessor),
('model', Sequential([
2024-04-23 22:11:55 +02:00
Dense(64, activation='relu', input_dim=4),
2024-04-23 22:10:38 +02:00
Dense(64, activation='relu'),
Dense(1)
]))
])
pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae'])
2024-05-14 23:12:11 +02:00
X_train_excluded = X_train.iloc[1:]
y_train_excluded = y_train.iloc[1:]
2024-05-14 22:59:17 +02:00
2024-05-14 23:19:35 +02:00
pipeline.fit(X_train_excluded, y_train_excluded, model__epochs=int(sys.argv[1]), model__validation_split=0.1)
2024-04-23 22:10:38 +02:00
pipeline['model'].save('powerlifting_model.h5')