add solution for lab8

2022-05-16 01:58:32 +02:00 · 2022-05-16 01:58:32 +02:00 · d572509234
commit d572509234
parent fa51d4a87a
7 changed files with 108 additions and 29 deletions
--- a/3
+++ b/3
@ -13,6 +13,7 @@ RUN pip3 install matplotlib
 RUN pip3 install torch
 RUN pip3 install sacred
 RUN pip3 install pymongo
 RUN pip3 install mflow
 ARG CUTOFF
 ARG KAGGLE_USERNAME
@ -27,6 +28,8 @@ COPY lab2/download.sh .
 COPY biblioteka_DL/dllib.py .
 COPY biblioteka_DL/evaluate.py .
 COPY biblioteka_DL/imdb_top_1000.csv .
 COPY predict.py .
 COPY registry.py .
 RUN chmod +x ./download.sh
 RUN ./download.sh
--- a/29
+++ b/29
@ -0,0 +1,29 @@
 pipeline {
    agent {
        docker {
 			image 'docker_image'
 		}
    }
    parameters {
        buildSelector(
          defaultSelector: lastSuccessful(),
          description: 'Which build to use for copying artifacts for predict',
          name: 'BUILD_SELECTOR')
        string(
            defaultValue: '{\\"inputs\\": [900.0]}',
            description: 'Input file',
            name: 'INPUT',
            trim: true
        )
    }
    stages {
        stage('Script') {
            steps {
                copyArtifacts projectName: 's444409-training/main', selector: buildParameter('BUILD_SELECTOR')
 		        sh "echo ${params.INPUT} > input_example.json"
 		        sh "python predict.py"
            }
        }
    }
 }
--- a/16
+++ b/16
@ -0,0 +1,16 @@
 pipeline {
    agent {
 	docker {
       		image 'docker_image'
       		args '-v /mlruns:/mlruns'
 	}
    }
    stages {
        stage('Script') {
            steps {
 		sh 'python3 ./registry.py'
            }
        }
    }
 }
--- a/12
+++ b/12
@ -1,9 +1,10 @@
 pipeline {
  	agent {
-      dockerfile {
+		docker {
-			 additionalBuildArgs "--build-arg KAGGLE_USERNAME=${params.KAGGLE_USERNAME} --build-arg KAGGLE_KEY=${params.KAGGLE_KEY} --build-arg CUTOFF=${params.CUTOFF} -t docker_image"
+		    image 'docker_image'
-		}
+			args '-v /mlruns:/mlruns'
-	}
+        	}
 	    }
 	parameters {
    string(
            defaultValue: '1000',
@ -22,6 +23,9 @@ pipeline {
 	    steps {
 	        sh 'python3 ./biblioteka_DL/dllib.py with "epochs=$EPOCHS"'
            archiveArtifacts artifacts: 'model.pkl, s444018_sacred_FileObserver/**/*.*, result.csv', followSymlinks: false
            archiveArtifacts artifacts: 'mlruns/**'
 			archiveArtifacts artifacts: 'my_model/**'
            build job: 's444018-evaluation/master/'
           }
        }
  }
--- a/biblioteka_DL/dllib.py
+++ b/biblioteka_DL/dllib.py
@ -1,25 +1,22 @@
 import sys
 import torch
 import mlflow
 import torch.nn as nn
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
 from mlflow.models import infer_signature
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score, mean_squared_error
 from sacred.observers import MongoObserver, FileStorageObserver
 from sacred import Experiment
 from urllib.parse import urlparse
 # mlflow.set_tracking_uri("http://172.17.0.1:5000")
 mlflow.set_experiment("s444018")
-ex = Experiment(save_git_info=False)
+epochs = int(sys.argv[1]) if len(sys.argv) > 1 else 20
 ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',
                                  db_name='sacred'))
 ex.observers.append(FileStorageObserver('s444018_sacred_FileObserver'))
@ex.config
 def my_config():
    epochs = "1000"
 def drop_relevant_columns(imbd_data):
@ -88,8 +85,7 @@ class LinearRegressionModel(torch.nn.Module):
        return y_pred
-@ex.automain
+def my_main(epochs):
 def my_main(epochs, _run):
    # num_epochs = 1000
    # num_epochs = int(sys.argv[1])
@ -153,23 +149,23 @@ def my_main(epochs, _run):
    # save model
    torch.save(model, "model.pkl")
-    predicted = []
+    input_example = gross_test_g
-    expected = []
+    siganture = infer_signature(gross_test_g, X_train)
    tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
    # print(tracking_url_type_store)
-    for i in range(0, len(X_test)):
+    if tracking_url_type_store != "file":
-        predicted.append(np.argmax(model(X_test[i]).detach().numpy(), axis=0))
+        mlflow.pytorch.log_model(model, "model", registered_model_name="s444018", signature=siganture,
-        expected.append(gross_test_g[i])
+                                 input_example=input_example)
    else:
        mlflow.pytorch.log_model(model, "model", signature=siganture, input_example=input_example)
        mlflow.pytorch.save_model(model, "my_model", signature=siganture, input_example=input_example)
    for i in range(0, len(expected)):
        expected[i] = expected[i][0]
    rmse = mean_squared_error(gross_test_g, pred, squared=False)
    mse = mean_squared_error(gross_test_g, pred)
-    _run.log_scalar("RMSE", rmse)
+    mlflow.log_param("MSE", mse)
-    _run.log_scalar("MSE", mse)
+    mlflow.log_param("epochs", epochs)
    _run.info['epochs'] = epochs
 # ex.run()
 ex.add_artifact("model.pkl")
 with mlflow.start_run() as run:
    my_main(epochs)
--- a/predict.py
+++ b/predict.py
@ -0,0 +1,16 @@
 import json
 import mlflow
 import sys
 import numpy as np
 #input = sys.argv[1]
 logged_model = 'mlruns/1/70439eb482b54d56b54b0ecc6f1ca96f/artifacts/s444409'
 loaded_model = mlflow.pyfunc.load_model(logged_model)
 with open('input_example.json') as f:
    data = json.load(f)
    input_example = np.array([data['inputs'][0]], dtype=np.float32)
 print(f'Prediction: {loaded_model.predict(input_example)}')
--- a/registry.py
+++ b/registry.py
@ -0,0 +1,15 @@
 import mlflow
 import json
 import numpy as np
 logged_model = '/mlruns/12/1c2b9737c0204b0ca825811c35fb6c64/artifacts/s444409'
 # Load model as a PyFuncModel.
 loaded_model = mlflow.pyfunc.load_model(logged_model)
 with open(f'{logged_model}/input_example.json') as f:
    data = json.load(f)
    input_example = np.array([data['inputs'][0]], dtype=np.float32)
 # Predict on a Pandas DataFrame.
 import pandas as pd
 print(f'Prediction: {loaded_model.predict(input_example)}')