diff --git a/model.py b/model.py index 1cd25a8..5f998d1 100644 --- a/model.py +++ b/model.py @@ -1,44 +1,3 @@ -# import sys -# import pandas as pd -# from sklearn.model_selection import train_test_split -# from sklearn.preprocessing import StandardScaler, OneHotEncoder -# from sklearn.compose import ColumnTransformer -# from sklearn.pipeline import Pipeline -# from tensorflow.keras.models import Sequential -# from tensorflow.keras.layers import Dense -# import tensorflow as tf -# -# data = pd.read_csv('./data/train.csv') -# -# data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna() -# -# features = data[['Sex', 'Age', 'BodyweightKg']] -# target = data['TotalKg'] -# -# X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42) -# -# preprocessor = ColumnTransformer( -# transformers=[ -# ('num', StandardScaler(), ['Age', 'BodyweightKg']), -# ('cat', OneHotEncoder(), ['Sex']) -# ] -# ) -# -# pipeline = Pipeline(steps=[ -# ('preprocessor', preprocessor), -# ('model', Sequential([ -# Dense(64, activation='relu', input_dim=4), -# Dense(64, activation='relu'), -# Dense(1) -# ])) -# ]) -# -# pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae']) -# -# pipeline.fit(X_train, y_train, model__epochs=int(sys.argv[1]), model__validation_split=0.1) -# -# pipeline['model'].save('powerlifting_model.h5') - import sys import pandas as pd from sklearn.model_selection import train_test_split @@ -49,26 +8,19 @@ from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense import tensorflow as tf -data = pd.read_csv('./data/train.csv') +data = pd.read_csv('./openpowerlifting.csv') -print(data.columns) # Debugging: Print DataFrame columns +data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna() -# Assuming the relevant columns are at these indexes -features_idx = [1, 4, 7] # Sex, Age, BodyweightKg -target_idx = 24 # TotalKg - -# Dropping rows with NaN values from relevant columns -data = data.iloc[:, [1, 4, 7, 24]].dropna() - -features = data.iloc[:, features_idx] -target = data.iloc[:, target_idx] +features = data[['Sex', 'Age', 'BodyweightKg']] +target = data['TotalKg'] X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42) preprocessor = ColumnTransformer( transformers=[ - ('num', StandardScaler(), [1, 2]), # Age, BodyweightKg - ('cat', OneHotEncoder(), [0]) # Sex + ('num', StandardScaler(), ['Age', 'BodyweightKg']), + ('cat', OneHotEncoder(), ['Sex']) ] ) @@ -86,4 +38,3 @@ pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae']) pipeline.fit(X_train, y_train, model__epochs=int(sys.argv[1]), model__validation_split=0.1) pipeline['model'].save('powerlifting_model.h5') -