This commit is contained in:
Szymon Bartanowicz 2024-05-14 22:26:47 +02:00
parent d02162bc7a
commit 14fc6d1120

View File

@ -1,3 +1,44 @@
# import sys
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler, OneHotEncoder
# from sklearn.compose import ColumnTransformer
# from sklearn.pipeline import Pipeline
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense
# import tensorflow as tf
#
# data = pd.read_csv('./data/train.csv')
#
# data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna()
#
# features = data[['Sex', 'Age', 'BodyweightKg']]
# target = data['TotalKg']
#
# X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
#
# preprocessor = ColumnTransformer(
# transformers=[
# ('num', StandardScaler(), ['Age', 'BodyweightKg']),
# ('cat', OneHotEncoder(), ['Sex'])
# ]
# )
#
# pipeline = Pipeline(steps=[
# ('preprocessor', preprocessor),
# ('model', Sequential([
# Dense(64, activation='relu', input_dim=4),
# Dense(64, activation='relu'),
# Dense(1)
# ]))
# ])
#
# pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae'])
#
# pipeline.fit(X_train, y_train, model__epochs=int(sys.argv[1]), model__validation_split=0.1)
#
# pipeline['model'].save('powerlifting_model.h5')
import sys import sys
import pandas as pd import pandas as pd
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
@ -10,17 +51,20 @@ import tensorflow as tf
data = pd.read_csv('./data/train.csv') data = pd.read_csv('./data/train.csv')
data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna() features_idx = [1, 4, 7] # Sex, Age, BodyweightKg
target_idx = 25 # TotalKg
features = data[['Sex', 'Age', 'BodyweightKg']] data = data.iloc[:, [1, 4, 7, 25]].dropna()
target = data['TotalKg']
features = data.iloc[:, features_idx]
target = data.iloc[:, target_idx]
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42) X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
preprocessor = ColumnTransformer( preprocessor = ColumnTransformer(
transformers=[ transformers=[
('num', StandardScaler(), ['Age', 'BodyweightKg']), ('num', StandardScaler(), [1, 2]), # Age, BodyweightKg
('cat', OneHotEncoder(), ['Sex']) ('cat', OneHotEncoder(), [0]) # Sex
] ]
) )
@ -38,3 +82,4 @@ pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae'])
pipeline.fit(X_train, y_train, model__epochs=int(sys.argv[1]), model__validation_split=0.1) pipeline.fit(X_train, y_train, model__epochs=int(sys.argv[1]), model__validation_split=0.1)
pipeline['model'].save('powerlifting_model.h5') pipeline['model'].save('powerlifting_model.h5')