diff --git a/mlflow/MLproject b/mlflow/MLproject new file mode 100644 index 0000000..8656fef --- /dev/null +++ b/mlflow/MLproject @@ -0,0 +1,7 @@ +name: MLflow_s464937 +conda_env: conda.yaml +entry_points: + optimal_parameters: + parameters: + epochs: { type: int, default: 20 } + command: 'python mlflow_model.py {epochs}' \ No newline at end of file diff --git a/mlflow/conda.yaml b/mlflow/conda.yaml new file mode 100644 index 0000000..de4e64c --- /dev/null +++ b/mlflow/conda.yaml @@ -0,0 +1,12 @@ +name: MLflow_s464937 +channels: + - defaults +dependencies: + - python=3.10 + - pip + - pip: + - wheel + - mlflow + - tensorflow + - pandas + - scikit-learn \ No newline at end of file diff --git a/mlflow/mlflow_model.py b/mlflow/mlflow_model.py new file mode 100644 index 0000000..ec1784d --- /dev/null +++ b/mlflow/mlflow_model.py @@ -0,0 +1,55 @@ +import sys +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import StandardScaler, OneHotEncoder +from sklearn.compose import ColumnTransformer +from sklearn.pipeline import Pipeline +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense +import tensorflow as tf +import mlflow + + +mlflow.set_tracking_uri("http://localhost:5000") + + +def main(): + data = pd.read_csv('./data/train.csv') + + data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna() + data['Age'] = pd.to_numeric(data['Age'], errors='coerce') + data['BodyweightKg'] = pd.to_numeric(data['BodyweightKg'], errors='coerce') + data['TotalKg'] = pd.to_numeric(data['TotalKg'], errors='coerce') + features = data[['Sex', 'Age', 'BodyweightKg']] + target = data['TotalKg'] + + X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42) + + preprocessor = ColumnTransformer( + transformers=[ + ('num', StandardScaler(), ['Age', 'BodyweightKg']), + ('cat', OneHotEncoder(), ['Sex']) + ], + ) + + pipeline = Pipeline(steps=[ + ('preprocessor', preprocessor), + ('model', Sequential([ + Dense(64, activation='relu', input_dim=5), + Dense(64, activation='relu'), + Dense(1) + ])) + ]) + + pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae']) + + X_train_excluded = X_train.iloc[1:] + y_train_excluded = y_train.iloc[1:] + + pipeline.fit(X_train_excluded, y_train_excluded, model__epochs=int(sys.argv[1]), model__validation_split=0.1) + + pipeline['model'].save('powerlifting_model.h5') + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/mlruns/0/meta.yaml b/mlruns/0/meta.yaml new file mode 100644 index 0000000..e9635fe --- /dev/null +++ b/mlruns/0/meta.yaml @@ -0,0 +1,6 @@ +artifact_location: mlflow-artifacts:/0 +creation_time: 1716052187853 +experiment_id: '0' +last_update_time: 1716052187853 +lifecycle_stage: active +name: Default