add solution for lab8

2022-05-16 01:58:32 +02:00 · 2022-05-16 01:58:32 +02:00 · d572509234
commit d572509234
parent fa51d4a87a
7 changed files with 108 additions and 29 deletions
--- a/3
+++ b/3
@ -13,6 +13,7 @@ RUN pip3 install matplotlib
 RUN pip3 install torch
 RUN pip3 install sacred
 RUN pip3 install pymongo
+RUN pip3 install mflow

 ARG CUTOFF
 ARG KAGGLE_USERNAME
@ -27,6 +28,8 @@ COPY lab2/download.sh .
 COPY biblioteka_DL/dllib.py .
 COPY biblioteka_DL/evaluate.py .
 COPY biblioteka_DL/imdb_top_1000.csv .
+COPY predict.py .
+COPY registry.py .

 RUN chmod +x ./download.sh
 RUN ./download.sh
--- a/29
+++ b/29
@ -0,0 +1,29 @@
+pipeline {
+    agent {
+        docker {
+			image 'docker_image'
+		}
+    }
+    parameters {
+        buildSelector(
+          defaultSelector: lastSuccessful(),
+          description: 'Which build to use for copying artifacts for predict',
+          name: 'BUILD_SELECTOR')
+        string(
+            defaultValue: '{\\"inputs\\": [900.0]}',
+            description: 'Input file',
+            name: 'INPUT',
+            trim: true
+        )
+    }
+
+    stages {
+        stage('Script') {
+            steps {
+                copyArtifacts projectName: 's444409-training/main', selector: buildParameter('BUILD_SELECTOR')
+		        sh "echo ${params.INPUT} > input_example.json"
+		        sh "python predict.py"
+            }
+        }
+    }
+}
--- a/16
+++ b/16
@ -0,0 +1,16 @@
+pipeline {
+    agent {
+	docker {
+       		image 'docker_image'
+       		args '-v /mlruns:/mlruns'
+	}
+    }
+
+    stages {
+        stage('Script') {
+            steps {
+		sh 'python3 ./registry.py'
+            }
+        }
+    }
+}
--- a/12
+++ b/12
@ -1,9 +1,10 @@
 pipeline {
  	agent {
-      dockerfile {
-			 additionalBuildArgs "--build-arg KAGGLE_USERNAME=${params.KAGGLE_USERNAME} --build-arg KAGGLE_KEY=${params.KAGGLE_KEY} --build-arg CUTOFF=${params.CUTOFF} -t docker_image"
-		}
-	}
+		docker {
+		    image 'docker_image'
+			args '-v /mlruns:/mlruns'
+        	}
+	    }
 	parameters {
    string(
            defaultValue: '1000',
@ -22,6 +23,9 @@ pipeline {
 	    steps {
 	        sh 'python3 ./biblioteka_DL/dllib.py with "epochs=$EPOCHS"'
            archiveArtifacts artifacts: 'model.pkl, s444018_sacred_FileObserver/**/*.*, result.csv', followSymlinks: false
+            archiveArtifacts artifacts: 'mlruns/**'
+			archiveArtifacts artifacts: 'my_model/**'
+            build job: 's444018-evaluation/master/'
           }
        }
  }
--- a/biblioteka_DL/dllib.py
+++ b/biblioteka_DL/dllib.py
@ -1,25 +1,22 @@
 import sys

 import torch
+import mlflow
 import torch.nn as nn
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
+from mlflow.models import infer_signature
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score, mean_squared_error
 from sacred.observers import MongoObserver, FileStorageObserver
 from sacred import Experiment
+from urllib.parse import urlparse

+# mlflow.set_tracking_uri("http://172.17.0.1:5000")
+mlflow.set_experiment("s444018")

-ex = Experiment(save_git_info=False)
-ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',
-                                  db_name='sacred'))
-
-ex.observers.append(FileStorageObserver('s444018_sacred_FileObserver'))
-
-@ex.config
-def my_config():
-    epochs = "1000"
+epochs = int(sys.argv[1]) if len(sys.argv) > 1 else 20


 def drop_relevant_columns(imbd_data):
@ -88,8 +85,7 @@ class LinearRegressionModel(torch.nn.Module):
        return y_pred


-@ex.automain
-def my_main(epochs, _run):
+def my_main(epochs):
    # num_epochs = 1000
    # num_epochs = int(sys.argv[1])

@ -153,23 +149,23 @@ def my_main(epochs, _run):
    # save model
    torch.save(model, "model.pkl")

-    predicted = []
-    expected = []
+    input_example = gross_test_g
+    siganture = infer_signature(gross_test_g, X_train)
+    tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
+    # print(tracking_url_type_store)

-    for i in range(0, len(X_test)):
-        predicted.append(np.argmax(model(X_test[i]).detach().numpy(), axis=0))
-        expected.append(gross_test_g[i])
+    if tracking_url_type_store != "file":
+        mlflow.pytorch.log_model(model, "model", registered_model_name="s444018", signature=siganture,
+                                 input_example=input_example)
+    else:
+        mlflow.pytorch.log_model(model, "model", signature=siganture, input_example=input_example)
+        mlflow.pytorch.save_model(model, "my_model", signature=siganture, input_example=input_example)

-    for i in range(0, len(expected)):
-        expected[i] = expected[i][0]
-
-    rmse = mean_squared_error(gross_test_g, pred, squared=False)
    mse = mean_squared_error(gross_test_g, pred)

-    _run.log_scalar("RMSE", rmse)
-    _run.log_scalar("MSE", mse)
-    _run.info['epochs'] = epochs
+    mlflow.log_param("MSE", mse)
+    mlflow.log_param("epochs", epochs)

-# ex.run()
-ex.add_artifact("model.pkl")

+with mlflow.start_run() as run:
+    my_main(epochs)
--- a/predict.py
+++ b/predict.py
@ -0,0 +1,16 @@
+import json
+import mlflow
+import sys
+import numpy as np
+
+#input = sys.argv[1]
+
+logged_model = 'mlruns/1/70439eb482b54d56b54b0ecc6f1ca96f/artifacts/s444409'
+loaded_model = mlflow.pyfunc.load_model(logged_model)
+
+
+with open('input_example.json') as f:
+    data = json.load(f)
+    input_example = np.array([data['inputs'][0]], dtype=np.float32)
+
+print(f'Prediction: {loaded_model.predict(input_example)}')
--- a/registry.py
+++ b/registry.py
@ -0,0 +1,15 @@
+import mlflow
+import json
+import numpy as np
+logged_model = '/mlruns/12/1c2b9737c0204b0ca825811c35fb6c64/artifacts/s444409'
+
+# Load model as a PyFuncModel.
+loaded_model = mlflow.pyfunc.load_model(logged_model)
+
+with open(f'{logged_model}/input_example.json') as f:
+    data = json.load(f)
+    input_example = np.array([data['inputs'][0]], dtype=np.float32)
+
+# Predict on a Pandas DataFrame.
+import pandas as pd
+print(f'Prediction: {loaded_model.predict(input_example)}')