mlflow

2024-05-18 19:11:00 +02:00 · 2024-05-18 19:11:00 +02:00 · 57def16f1a
commit 57def16f1a
parent 32a592f78d
4 changed files with 80 additions and 0 deletions
--- a/mlflow/MLproject
+++ b/mlflow/MLproject
@ -0,0 +1,7 @@
 name: MLflow_s464937
 conda_env: conda.yaml
 entry_points:
  optimal_parameters:
    parameters:
      epochs: { type: int, default: 20 }
    command: 'python mlflow_model.py {epochs}'
--- a/mlflow/conda.yaml
+++ b/mlflow/conda.yaml
@ -0,0 +1,12 @@
 name: MLflow_s464937
 channels:
  - defaults
 dependencies:
  - python=3.10
  - pip
  - pip:
      - wheel
      - mlflow
      - tensorflow
      - pandas
      - scikit-learn
--- a/mlflow/mlflow_model.py
+++ b/mlflow/mlflow_model.py
@ -0,0 +1,55 @@
 import sys
 import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import StandardScaler, OneHotEncoder
 from sklearn.compose import ColumnTransformer
 from sklearn.pipeline import Pipeline
 from tensorflow.keras.models import Sequential
 from tensorflow.keras.layers import Dense
 import tensorflow as tf
 import mlflow
 mlflow.set_tracking_uri("http://localhost:5000")
 def main():
    data = pd.read_csv('./data/train.csv')
    data = data[['Sex', 'Age', 'BodyweightKg', 'TotalKg']].dropna()
    data['Age'] = pd.to_numeric(data['Age'], errors='coerce')
    data['BodyweightKg'] = pd.to_numeric(data['BodyweightKg'], errors='coerce')
    data['TotalKg'] = pd.to_numeric(data['TotalKg'], errors='coerce')
    features = data[['Sex', 'Age', 'BodyweightKg']]
    target = data['TotalKg']
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), ['Age', 'BodyweightKg']),
            ('cat', OneHotEncoder(), ['Sex'])
        ],
    )
    pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('model', Sequential([
            Dense(64, activation='relu', input_dim=5),
            Dense(64, activation='relu'),
            Dense(1)
        ]))
    ])
    pipeline['model'].compile(optimizer='adam', loss='mse', metrics=['mae'])
    X_train_excluded = X_train.iloc[1:]
    y_train_excluded = y_train.iloc[1:]
    pipeline.fit(X_train_excluded, y_train_excluded, model__epochs=int(sys.argv[1]), model__validation_split=0.1)
    pipeline['model'].save('powerlifting_model.h5')
 if __name__ == '__main__':
    main()
--- a/mlruns/0/meta.yaml
+++ b/mlruns/0/meta.yaml
@ -0,0 +1,6 @@
 artifact_location: mlflow-artifacts:/0
 creation_time: 1716052187853
 experiment_id: '0'
 last_update_time: 1716052187853
 lifecycle_stage: active
 name: Default