From d572509234c56266094db6ea1e179cdd50590072 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Szymon=20Parafin=CC=81ski?= <sparafinski@fandom.com>
Date: Mon, 16 May 2022 01:58:32 +0200
Subject: [PATCH] add solution for lab8

---
 Dockerfile             |  3 +++
 Jenkinsfile_predict    | 29 ++++++++++++++++++++++++++
 Jenkinsfile_registry   | 16 +++++++++++++++
 Jenkinsfile_train      | 12 +++++++----
 biblioteka_DL/dllib.py | 46 +++++++++++++++++++-----------------------
 predict.py             | 16 +++++++++++++++
 registry.py            | 15 ++++++++++++++
 7 files changed, 108 insertions(+), 29 deletions(-)
 create mode 100644 Jenkinsfile_predict
 create mode 100644 Jenkinsfile_registry
 create mode 100644 predict.py
 create mode 100644 registry.py

diff --git a/Dockerfile b/Dockerfile
index 75ff4a5..2af5a66 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -13,6 +13,7 @@ RUN pip3 install matplotlib
 RUN pip3 install torch
 RUN pip3 install sacred
 RUN pip3 install pymongo
+RUN pip3 install mflow
 
 ARG CUTOFF
 ARG KAGGLE_USERNAME
@@ -27,6 +28,8 @@ COPY lab2/download.sh .
 COPY biblioteka_DL/dllib.py .
 COPY biblioteka_DL/evaluate.py .
 COPY biblioteka_DL/imdb_top_1000.csv .
+COPY predict.py .
+COPY registry.py .
 
 RUN chmod +x ./download.sh
 RUN ./download.sh
diff --git a/Jenkinsfile_predict b/Jenkinsfile_predict
new file mode 100644
index 0000000..13def17
--- /dev/null
+++ b/Jenkinsfile_predict
@@ -0,0 +1,29 @@
+pipeline {
+    agent {
+        docker {
+			image 'docker_image'
+		}
+    }
+    parameters {
+        buildSelector(
+          defaultSelector: lastSuccessful(),
+          description: 'Which build to use for copying artifacts for predict',
+          name: 'BUILD_SELECTOR')
+        string(
+            defaultValue: '{\\"inputs\\": [900.0]}',
+            description: 'Input file',
+            name: 'INPUT',
+            trim: true
+        )
+    }
+
+    stages {
+        stage('Script') {
+            steps {
+                copyArtifacts projectName: 's444409-training/main', selector: buildParameter('BUILD_SELECTOR')
+		        sh "echo ${params.INPUT} > input_example.json"
+		        sh "python predict.py"
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/Jenkinsfile_registry b/Jenkinsfile_registry
new file mode 100644
index 0000000..7373578
--- /dev/null
+++ b/Jenkinsfile_registry
@@ -0,0 +1,16 @@
+pipeline {
+    agent {
+	docker {
+       		image 'docker_image'
+       		args '-v /mlruns:/mlruns'
+	}
+    }
+
+    stages {
+        stage('Script') {
+            steps {
+		sh 'python3 ./registry.py'
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/Jenkinsfile_train b/Jenkinsfile_train
index 607503d..eb9bdbf 100644
--- a/Jenkinsfile_train
+++ b/Jenkinsfile_train
@@ -1,9 +1,10 @@
 pipeline {
   	agent {
-      dockerfile {
-			 additionalBuildArgs "--build-arg KAGGLE_USERNAME=${params.KAGGLE_USERNAME} --build-arg KAGGLE_KEY=${params.KAGGLE_KEY} --build-arg CUTOFF=${params.CUTOFF} -t docker_image"
-		}
-	}
+		docker {
+		    image 'docker_image'
+			args '-v /mlruns:/mlruns'
+        	}
+	    }
 	parameters {
     string(
             defaultValue: '1000',
@@ -22,6 +23,9 @@ pipeline {
 	    steps {
 	        sh 'python3 ./biblioteka_DL/dllib.py with "epochs=$EPOCHS"'
             archiveArtifacts artifacts: 'model.pkl, s444018_sacred_FileObserver/**/*.*, result.csv', followSymlinks: false
+            archiveArtifacts artifacts: 'mlruns/**'
+			archiveArtifacts artifacts: 'my_model/**'
+            build job: 's444018-evaluation/master/'
            }
         }
   }
diff --git a/biblioteka_DL/dllib.py b/biblioteka_DL/dllib.py
index 48964cf..5b6ef99 100644
--- a/biblioteka_DL/dllib.py
+++ b/biblioteka_DL/dllib.py
@@ -1,25 +1,22 @@
 import sys
 
 import torch
+import mlflow
 import torch.nn as nn
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
+from mlflow.models import infer_signature
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score, mean_squared_error
 from sacred.observers import MongoObserver, FileStorageObserver
 from sacred import Experiment
+from urllib.parse import urlparse
 
+# mlflow.set_tracking_uri("http://172.17.0.1:5000")
+mlflow.set_experiment("s444018")
 
-ex = Experiment(save_git_info=False)
-ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',
-                                  db_name='sacred'))
-
-ex.observers.append(FileStorageObserver('s444018_sacred_FileObserver'))
-
-@ex.config
-def my_config():
-    epochs = "1000"
+epochs = int(sys.argv[1]) if len(sys.argv) > 1 else 20
 
 
 def drop_relevant_columns(imbd_data):
@@ -88,8 +85,7 @@ class LinearRegressionModel(torch.nn.Module):
         return y_pred
 
 
-@ex.automain
-def my_main(epochs, _run):
+def my_main(epochs):
     # num_epochs = 1000
     # num_epochs = int(sys.argv[1])
 
@@ -153,23 +149,23 @@ def my_main(epochs, _run):
     # save model
     torch.save(model, "model.pkl")
 
-    predicted = []
-    expected = []
+    input_example = gross_test_g
+    siganture = infer_signature(gross_test_g, X_train)
+    tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
+    # print(tracking_url_type_store)
 
-    for i in range(0, len(X_test)):
-        predicted.append(np.argmax(model(X_test[i]).detach().numpy(), axis=0))
-        expected.append(gross_test_g[i])
+    if tracking_url_type_store != "file":
+        mlflow.pytorch.log_model(model, "model", registered_model_name="s444018", signature=siganture,
+                                 input_example=input_example)
+    else:
+        mlflow.pytorch.log_model(model, "model", signature=siganture, input_example=input_example)
+        mlflow.pytorch.save_model(model, "my_model", signature=siganture, input_example=input_example)
 
-    for i in range(0, len(expected)):
-        expected[i] = expected[i][0]
-
-    rmse = mean_squared_error(gross_test_g, pred, squared=False)
     mse = mean_squared_error(gross_test_g, pred)
 
-    _run.log_scalar("RMSE", rmse)
-    _run.log_scalar("MSE", mse)
-    _run.info['epochs'] = epochs
+    mlflow.log_param("MSE", mse)
+    mlflow.log_param("epochs", epochs)
 
-# ex.run()
-ex.add_artifact("model.pkl")
 
+with mlflow.start_run() as run:
+    my_main(epochs)
\ No newline at end of file
diff --git a/predict.py b/predict.py
new file mode 100644
index 0000000..e72d0eb
--- /dev/null
+++ b/predict.py
@@ -0,0 +1,16 @@
+import json
+import mlflow
+import sys
+import numpy as np
+
+#input = sys.argv[1]
+
+logged_model = 'mlruns/1/70439eb482b54d56b54b0ecc6f1ca96f/artifacts/s444409'
+loaded_model = mlflow.pyfunc.load_model(logged_model)
+
+
+with open('input_example.json') as f:
+    data = json.load(f)
+    input_example = np.array([data['inputs'][0]], dtype=np.float32)
+
+print(f'Prediction: {loaded_model.predict(input_example)}')
\ No newline at end of file
diff --git a/registry.py b/registry.py
new file mode 100644
index 0000000..04efb61
--- /dev/null
+++ b/registry.py
@@ -0,0 +1,15 @@
+import mlflow
+import json
+import numpy as np
+logged_model = '/mlruns/12/1c2b9737c0204b0ca825811c35fb6c64/artifacts/s444409'
+
+# Load model as a PyFuncModel.
+loaded_model = mlflow.pyfunc.load_model(logged_model)
+
+with open(f'{logged_model}/input_example.json') as f:
+    data = json.load(f)
+    input_example = np.array([data['inputs'][0]], dtype=np.float32)
+
+# Predict on a Pandas DataFrame.
+import pandas as pd
+print(f'Prediction: {loaded_model.predict(input_example)}')
\ No newline at end of file