From d572509234c56266094db6ea1e179cdd50590072 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20Parafin=CC=81ski?= Date: Mon, 16 May 2022 01:58:32 +0200 Subject: [PATCH] add solution for lab8 --- Dockerfile | 3 +++ Jenkinsfile_predict | 29 ++++++++++++++++++++++++++ Jenkinsfile_registry | 16 +++++++++++++++ Jenkinsfile_train | 12 +++++++---- biblioteka_DL/dllib.py | 46 +++++++++++++++++++----------------------- predict.py | 16 +++++++++++++++ registry.py | 15 ++++++++++++++ 7 files changed, 108 insertions(+), 29 deletions(-) create mode 100644 Jenkinsfile_predict create mode 100644 Jenkinsfile_registry create mode 100644 predict.py create mode 100644 registry.py diff --git a/Dockerfile b/Dockerfile index 75ff4a5..2af5a66 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,6 +13,7 @@ RUN pip3 install matplotlib RUN pip3 install torch RUN pip3 install sacred RUN pip3 install pymongo +RUN pip3 install mflow ARG CUTOFF ARG KAGGLE_USERNAME @@ -27,6 +28,8 @@ COPY lab2/download.sh . COPY biblioteka_DL/dllib.py . COPY biblioteka_DL/evaluate.py . COPY biblioteka_DL/imdb_top_1000.csv . +COPY predict.py . +COPY registry.py . RUN chmod +x ./download.sh RUN ./download.sh diff --git a/Jenkinsfile_predict b/Jenkinsfile_predict new file mode 100644 index 0000000..13def17 --- /dev/null +++ b/Jenkinsfile_predict @@ -0,0 +1,29 @@ +pipeline { + agent { + docker { + image 'docker_image' + } + } + parameters { + buildSelector( + defaultSelector: lastSuccessful(), + description: 'Which build to use for copying artifacts for predict', + name: 'BUILD_SELECTOR') + string( + defaultValue: '{\\"inputs\\": [900.0]}', + description: 'Input file', + name: 'INPUT', + trim: true + ) + } + + stages { + stage('Script') { + steps { + copyArtifacts projectName: 's444409-training/main', selector: buildParameter('BUILD_SELECTOR') + sh "echo ${params.INPUT} > input_example.json" + sh "python predict.py" + } + } + } +} \ No newline at end of file diff --git a/Jenkinsfile_registry b/Jenkinsfile_registry new file mode 100644 index 0000000..7373578 --- /dev/null +++ b/Jenkinsfile_registry @@ -0,0 +1,16 @@ +pipeline { + agent { + docker { + image 'docker_image' + args '-v /mlruns:/mlruns' + } + } + + stages { + stage('Script') { + steps { + sh 'python3 ./registry.py' + } + } + } +} \ No newline at end of file diff --git a/Jenkinsfile_train b/Jenkinsfile_train index 607503d..eb9bdbf 100644 --- a/Jenkinsfile_train +++ b/Jenkinsfile_train @@ -1,9 +1,10 @@ pipeline { agent { - dockerfile { - additionalBuildArgs "--build-arg KAGGLE_USERNAME=${params.KAGGLE_USERNAME} --build-arg KAGGLE_KEY=${params.KAGGLE_KEY} --build-arg CUTOFF=${params.CUTOFF} -t docker_image" - } - } + docker { + image 'docker_image' + args '-v /mlruns:/mlruns' + } + } parameters { string( defaultValue: '1000', @@ -22,6 +23,9 @@ pipeline { steps { sh 'python3 ./biblioteka_DL/dllib.py with "epochs=$EPOCHS"' archiveArtifacts artifacts: 'model.pkl, s444018_sacred_FileObserver/**/*.*, result.csv', followSymlinks: false + archiveArtifacts artifacts: 'mlruns/**' + archiveArtifacts artifacts: 'my_model/**' + build job: 's444018-evaluation/master/' } } } diff --git a/biblioteka_DL/dllib.py b/biblioteka_DL/dllib.py index 48964cf..5b6ef99 100644 --- a/biblioteka_DL/dllib.py +++ b/biblioteka_DL/dllib.py @@ -1,25 +1,22 @@ import sys import torch +import mlflow import torch.nn as nn import pandas as pd import numpy as np import matplotlib.pyplot as plt +from mlflow.models import infer_signature from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, mean_squared_error from sacred.observers import MongoObserver, FileStorageObserver from sacred import Experiment +from urllib.parse import urlparse +# mlflow.set_tracking_uri("http://172.17.0.1:5000") +mlflow.set_experiment("s444018") -ex = Experiment(save_git_info=False) -ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', - db_name='sacred')) - -ex.observers.append(FileStorageObserver('s444018_sacred_FileObserver')) - -@ex.config -def my_config(): - epochs = "1000" +epochs = int(sys.argv[1]) if len(sys.argv) > 1 else 20 def drop_relevant_columns(imbd_data): @@ -88,8 +85,7 @@ class LinearRegressionModel(torch.nn.Module): return y_pred -@ex.automain -def my_main(epochs, _run): +def my_main(epochs): # num_epochs = 1000 # num_epochs = int(sys.argv[1]) @@ -153,23 +149,23 @@ def my_main(epochs, _run): # save model torch.save(model, "model.pkl") - predicted = [] - expected = [] + input_example = gross_test_g + siganture = infer_signature(gross_test_g, X_train) + tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme + # print(tracking_url_type_store) - for i in range(0, len(X_test)): - predicted.append(np.argmax(model(X_test[i]).detach().numpy(), axis=0)) - expected.append(gross_test_g[i]) + if tracking_url_type_store != "file": + mlflow.pytorch.log_model(model, "model", registered_model_name="s444018", signature=siganture, + input_example=input_example) + else: + mlflow.pytorch.log_model(model, "model", signature=siganture, input_example=input_example) + mlflow.pytorch.save_model(model, "my_model", signature=siganture, input_example=input_example) - for i in range(0, len(expected)): - expected[i] = expected[i][0] - - rmse = mean_squared_error(gross_test_g, pred, squared=False) mse = mean_squared_error(gross_test_g, pred) - _run.log_scalar("RMSE", rmse) - _run.log_scalar("MSE", mse) - _run.info['epochs'] = epochs + mlflow.log_param("MSE", mse) + mlflow.log_param("epochs", epochs) -# ex.run() -ex.add_artifact("model.pkl") +with mlflow.start_run() as run: + my_main(epochs) \ No newline at end of file diff --git a/predict.py b/predict.py new file mode 100644 index 0000000..e72d0eb --- /dev/null +++ b/predict.py @@ -0,0 +1,16 @@ +import json +import mlflow +import sys +import numpy as np + +#input = sys.argv[1] + +logged_model = 'mlruns/1/70439eb482b54d56b54b0ecc6f1ca96f/artifacts/s444409' +loaded_model = mlflow.pyfunc.load_model(logged_model) + + +with open('input_example.json') as f: + data = json.load(f) + input_example = np.array([data['inputs'][0]], dtype=np.float32) + +print(f'Prediction: {loaded_model.predict(input_example)}') \ No newline at end of file diff --git a/registry.py b/registry.py new file mode 100644 index 0000000..04efb61 --- /dev/null +++ b/registry.py @@ -0,0 +1,15 @@ +import mlflow +import json +import numpy as np +logged_model = '/mlruns/12/1c2b9737c0204b0ca825811c35fb6c64/artifacts/s444409' + +# Load model as a PyFuncModel. +loaded_model = mlflow.pyfunc.load_model(logged_model) + +with open(f'{logged_model}/input_example.json') as f: + data = json.load(f) + input_example = np.array([data['inputs'][0]], dtype=np.float32) + +# Predict on a Pandas DataFrame. +import pandas as pd +print(f'Prediction: {loaded_model.predict(input_example)}') \ No newline at end of file