From 4be943832bd9670dfbcd2b4708d9060cd88a7141 Mon Sep 17 00:00:00 2001 From: PawelDopierala Date: Thu, 16 May 2024 03:01:35 +0200 Subject: [PATCH] Add mlflow --- Dockerfile | 2 +- JenkinsfileStats | 2 +- JenkinsfileTraining | 12 +++++++++++- MLProject | 15 +++++++++++++++ Readme.md | 1 + create_model.py | 12 ++++++++++-- evaluate.py | 12 +++++++++++- 7 files changed, 50 insertions(+), 6 deletions(-) create mode 100644 MLProject create mode 100644 Readme.md diff --git a/Dockerfile b/Dockerfile index 47596ba..d63e477 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM ubuntu:latest RUN apt-get update && \ apt-get install -y python3-pip && \ - pip3 install kaggle pandas scikit-learn tensorflow matplotlib + pip3 install kaggle pandas scikit-learn tensorflow matplotlib mlflow RUN useradd -ms /bin/bash jenkins diff --git a/JenkinsfileStats b/JenkinsfileStats index 2d49034..b85760d 100644 --- a/JenkinsfileStats +++ b/JenkinsfileStats @@ -1,6 +1,6 @@ pipeline { agent { - docker { image 'paweldopierala/ium:1.0.0' } + docker { image 'paweldopierala/ium:2.0.0' } } parameters{ diff --git a/JenkinsfileTraining b/JenkinsfileTraining index 5657ba0..826e6fb 100644 --- a/JenkinsfileTraining +++ b/JenkinsfileTraining @@ -14,6 +14,16 @@ pipeline { description: 'Epochs', name: 'EPOCHS' ) + string( + defaultValue: '0.001', + description: 'Learning Rate', + name: 'LEARNING_RATE' + ) + string( + defaultValue: '32', + description: 'Batch size', + name: 'BATCH_SIZE' + ) } triggers { @@ -37,7 +47,7 @@ pipeline { stage('Script') { steps { sh 'chmod 777 ./create_model.py' - sh "python3 ./create_model.py ${params.EPOCHS}" + sh "python3 ./create_model.py ${params.EPOCHS} ${params.LEARNING_RATE} ${params.BATCH_SIZE}" } } stage('CreateArtifacts') { diff --git a/MLProject b/MLProject new file mode 100644 index 0000000..322d635 --- /dev/null +++ b/MLProject @@ -0,0 +1,15 @@ +name: HousePriceModel + +docker_env: + image: paweldopierala/ium:2.0.0 + +entry_points: + main: + parameters: + epochs: {type: int, default: 20} + learning_rate: {type: float, default: 0.001} + batch_size: {type: int, default: 20} + command: "python train.py {epochs} {learning_rate} {batch_size}" + + test: + command: "python test.py" \ No newline at end of file diff --git a/Readme.md b/Readme.md new file mode 100644 index 0000000..a19b3b0 --- /dev/null +++ b/Readme.md @@ -0,0 +1 @@ +```python -m mlflow run .``` \ No newline at end of file diff --git a/create_model.py b/create_model.py index 97b27d3..18d3fbe 100644 --- a/create_model.py +++ b/create_model.py @@ -4,10 +4,13 @@ from keras.models import Sequential from keras.layers import Dense from keras.optimizers import Adam from keras import regularizers +import mlflow from helper import prepare_tensors epochs = int(sys.argv[1]) +learning_rate = float(sys.argv[2]) +batch_size = int(sys.argv[3]) hp_train = pd.read_csv('hp_train.csv') hp_dev = pd.read_csv('hp_dev.csv') @@ -22,9 +25,14 @@ model.add(Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)) model.add(Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01))) model.add(Dense(1, activation='linear')) -adam = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7) +adam = Adam(learning_rate=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-7) model.compile(optimizer=adam, loss='mean_squared_error') -model.fit(X_train, Y_train, epochs=epochs, batch_size=32, validation_data=(X_dev, Y_dev)) +model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_dev, Y_dev)) model.save('hp_model.h5') + +with mlflow.start_run() as run: + mlflow.log_param("epochs", epochs) + mlflow.log_param("learning_rate", learning_rate) + mlflow.log_param("batch_size", batch_size) diff --git a/evaluate.py b/evaluate.py index 81b3ba9..4dc4b11 100644 --- a/evaluate.py +++ b/evaluate.py @@ -2,12 +2,17 @@ import pandas as pd import numpy as np import sys import os + +import mlflow from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score from keras.models import load_model from helper import prepare_tensors import matplotlib.pyplot as plt -build_number = int(sys.argv[1]) +if len(sys.argv) > 1: + build_number = int(sys.argv[1]) +else: + build_number = 0 hp_test = pd.read_csv('hp_test.csv') X_test, Y_test = prepare_tensors(hp_test) @@ -49,3 +54,8 @@ for metric in metrics: plot_file = f'plot_{metric.lower()}.png' plt.savefig(plot_file) plt.close() + +with mlflow.start_run() as run: + mlflow.log_metric('RMSE', rmse) + mlflow.log_metric('MAE', mae) + mlflow.log_metric('R2', r2)