fix

2024-05-14 22:26:47 +02:00 · 2024-05-14 22:26:47 +02:00 · 14fc6d1120
commit 14fc6d1120
parent d02162bc7a
1 changed files with 50 additions and 5 deletions
--- a/model.py
+++ b/model.py
@ -1,3 +1,44 @@
 # import sys
 # import pandas as pd
 # from sklearn.model_selection import train_test_split
 # from sklearn.preprocessing import StandardScaler, OneHotEncoder
 # from sklearn.compose import ColumnTransformer
 # from sklearn.pipeline import Pipeline
 # from tensorflow.keras.models import Sequential
 # from tensorflow.keras.layers import Dense
 # import tensorflow as tf
 #
 # data = pd.read_csv('./data/train.csv')
 #
 # data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna()
 #
 # features = data[['Sex', 'Age', 'BodyweightKg']]
 # target = data['TotalKg']
 #
 # X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
 #
 # preprocessor = ColumnTransformer(
 #     transformers=[
 #         ('num', StandardScaler(), ['Age', 'BodyweightKg']),
 #         ('cat', OneHotEncoder(), ['Sex'])
 #     ]
 # )
 #
 # pipeline = Pipeline(steps=[
 #     ('preprocessor', preprocessor),
 #     ('model', Sequential([
 #         Dense(64, activation='relu', input_dim=4),
 #         Dense(64, activation='relu'),
 #         Dense(1)
 #     ]))
 # ])
 #
 # pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae'])
 #
 # pipeline.fit(X_train, y_train, model__epochs=int(sys.argv[1]), model__validation_split=0.1)
 #
 # pipeline['model'].save('powerlifting_model.h5')
 import sys
 import pandas as pd
 from sklearn.model_selection import train_test_split
@ -10,17 +51,20 @@ import tensorflow as tf
 data = pd.read_csv('./data/train.csv')
-data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna()
+features_idx = [1, 4, 7]  # Sex, Age, BodyweightKg
 target_idx = 25  # TotalKg
-features = data[['Sex', 'Age', 'BodyweightKg']]
+data = data.iloc[:, [1, 4, 7, 25]].dropna()
-target = data['TotalKg']
+
 features = data.iloc[:, features_idx]
 target = data.iloc[:, target_idx]
 X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
 preprocessor = ColumnTransformer(
    transformers=[
-        ('num', StandardScaler(), ['Age', 'BodyweightKg']),
+        ('num', StandardScaler(), [1, 2]),  # Age, BodyweightKg
-        ('cat', OneHotEncoder(), ['Sex'])
+        ('cat', OneHotEncoder(), [0])  # Sex
    ]
 )
@ -38,3 +82,4 @@ pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae'])
 pipeline.fit(X_train, y_train, model__epochs=int(sys.argv[1]), model__validation_split=0.1)
 pipeline['model'].save('powerlifting_model.h5')