fix

2024-05-14 22:32:48 +02:00 · 2024-05-14 22:32:48 +02:00 · 8942ab2122
commit 8942ab2122
parent 7e735543b9
1 changed files with 6 additions and 55 deletions
--- a/model.py
+++ b/model.py
@ -1,44 +1,3 @@
-# import sys
-# import pandas as pd
-# from sklearn.model_selection import train_test_split
-# from sklearn.preprocessing import StandardScaler, OneHotEncoder
-# from sklearn.compose import ColumnTransformer
-# from sklearn.pipeline import Pipeline
-# from tensorflow.keras.models import Sequential
-# from tensorflow.keras.layers import Dense
-# import tensorflow as tf
-#
-# data = pd.read_csv('./data/train.csv')
-#
-# data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna()
-#
-# features = data[['Sex', 'Age', 'BodyweightKg']]
-# target = data['TotalKg']
-#
-# X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
-#
-# preprocessor = ColumnTransformer(
-#     transformers=[
-#         ('num', StandardScaler(), ['Age', 'BodyweightKg']),
-#         ('cat', OneHotEncoder(), ['Sex'])
-#     ]
-# )
-#
-# pipeline = Pipeline(steps=[
-#     ('preprocessor', preprocessor),
-#     ('model', Sequential([
-#         Dense(64, activation='relu', input_dim=4),
-#         Dense(64, activation='relu'),
-#         Dense(1)
-#     ]))
-# ])
-#
-# pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae'])
-#
-# pipeline.fit(X_train, y_train, model__epochs=int(sys.argv[1]), model__validation_split=0.1)
-#
-# pipeline['model'].save('powerlifting_model.h5')
-
 import sys
 import pandas as pd
 from sklearn.model_selection import train_test_split
@ -49,26 +8,19 @@ from tensorflow.keras.models import Sequential
 from tensorflow.keras.layers import Dense
 import tensorflow as tf

-data = pd.read_csv('./data/train.csv')
+data = pd.read_csv('./openpowerlifting.csv')

-print(data.columns)  # Debugging: Print DataFrame columns
+data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna()

-# Assuming the relevant columns are at these indexes
-features_idx = [1, 4, 7]  # Sex, Age, BodyweightKg
-target_idx = 24  # TotalKg
-
-# Dropping rows with NaN values from relevant columns
-data = data.iloc[:, [1, 4, 7, 24]].dropna()
-
-features = data.iloc[:, features_idx]
-target = data.iloc[:, target_idx]
+features = data[['Sex', 'Age', 'BodyweightKg']]
+target = data['TotalKg']

 X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

 preprocessor = ColumnTransformer(
    transformers=[
-        ('num', StandardScaler(), [1, 2]),  # Age, BodyweightKg
-        ('cat', OneHotEncoder(), [0])  # Sex
+        ('num', StandardScaler(), ['Age', 'BodyweightKg']),
+        ('cat', OneHotEncoder(), ['Sex'])
    ]
 )

@ -86,4 +38,3 @@ pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae'])
 pipeline.fit(X_train, y_train, model__epochs=int(sys.argv[1]), model__validation_split=0.1)

 pipeline['model'].save('powerlifting_model.h5')
-