# import sys # import pandas as pd # from sklearn.model_selection import train_test_split # from sklearn.preprocessing import StandardScaler, OneHotEncoder # from sklearn.compose import ColumnTransformer # from sklearn.pipeline import Pipeline # from tensorflow.keras.models import Sequential # from tensorflow.keras.layers import Dense # import tensorflow as tf # # data = pd.read_csv('./data/train.csv') # # data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna() # # features = data[['Sex', 'Age', 'BodyweightKg']] # target = data['TotalKg'] # # X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42) # # preprocessor = ColumnTransformer( # transformers=[ # ('num', StandardScaler(), ['Age', 'BodyweightKg']), # ('cat', OneHotEncoder(), ['Sex']) # ] # ) # # pipeline = Pipeline(steps=[ # ('preprocessor', preprocessor), # ('model', Sequential([ # Dense(64, activation='relu', input_dim=4), # Dense(64, activation='relu'), # Dense(1) # ])) # ]) # # pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae']) # # pipeline.fit(X_train, y_train, model__epochs=int(sys.argv[1]), model__validation_split=0.1) # # pipeline['model'].save('powerlifting_model.h5') import sys import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense import tensorflow as tf data = pd.read_csv('./data/train.csv') print(data.columns) # Debugging: Print DataFrame columns # Assuming the relevant columns are at these indexes features_idx = [1, 4, 7] # Sex, Age, BodyweightKg target_idx = 24 # TotalKg # Dropping rows with NaN values from relevant columns data = data.iloc[:, [1, 4, 7, 24]].dropna() features = data.iloc[:, features_idx] target = data.iloc[:, target_idx] X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42) preprocessor = ColumnTransformer( transformers=[ ('num', StandardScaler(), [1, 2]), # Age, BodyweightKg ('cat', OneHotEncoder(), [0]) # Sex ] ) pipeline = Pipeline(steps=[ ('preprocessor', preprocessor), ('model', Sequential([ Dense(64, activation='relu', input_dim=4), Dense(64, activation='relu'), Dense(1) ])) ]) pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae']) pipeline.fit(X_train, y_train, model__epochs=int(sys.argv[1]), model__validation_split=0.1) pipeline['model'].save('powerlifting_model.h5')