save model

2022-05-16 01:31:02 +02:00 · 2022-05-16 01:31:02 +02:00 · 7a446f1753
commit 7a446f1753
parent 518f414734
6 changed files with 208 additions and 11 deletions
--- a/.gitignore
+++ b/.gitignore
@ -154,4 +154,4 @@ fabric.properties
 kaggle.json
 Car_Prices_Poland_Kaggle*
 CarPrices*
-
+IUM08/*
--- a/9
+++ b/9
@ -13,13 +13,18 @@ pipeline {
        }
        stage('Train model with sacred') {
            steps {
-                sh "python3 lab07_sacred.py with 'epochs=$epoch'"
+                sh "python3 lab08_deepLearining_mlflow.py $epoch'"
                archiveArtifacts artifacts: 'games_model.pkl'
                archiveArtifacts artifacts: 'mlruns/**'
                archiveArtifacts artifacts: 'my_model/**'
                sh 'rm -r mlruns'
                sh 'rm -r my_model'
            }
        }
    }
    post {
        success {
-            archiveArtifacts artifacts: 'prediction_results.csv, CarPrices_pytorch_model.pkl, s444507_sacred_FileObserver/**/*.*', followSymlinks: false
+            archiveArtifacts artifacts: 'CarPrices_pytorch_model.pkl, mlruns/**, my_model/**', followSymlinks: false
        }
        always {
            emailext body: "${currentBuild.currentResult}", subject: 's444507-training', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
--- a/28
+++ b/28
@ -0,0 +1,28 @@
 pipeline {
    agent {
        docker { image 's444507_create_dataset_image:latest' }
    }
    parameters {
        string(name: 'epoch', defaultValue: '1000', description: 'Number of epochs to train model.')
    }
    stages {
        stage('Get arifacts') {
            steps {
                copyArtifacts fingerprintArtifacts: true, projectName: 's444507-create-dataset', selector: lastSuccessful()
            }
        }
        stage('Train model with sacred') {
            steps {
                sh "python3 lab07_sacred.py with 'epochs=$epoch'"
            }
        }
    }
    post {
        success {
            archiveArtifacts artifacts: 'prediction_results.csv, CarPrices_pytorch_model.pkl, s444507_sacred_FileObserver/**/*.*', followSymlinks: false
        }
        always {
            emailext body: "${currentBuild.currentResult}", subject: 's444507-training', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
        }
    }
 }
--- a/14
+++ b/14
@ -1,13 +1,11 @@
-name: tutorial
+name: s444507
-conda_env: conda.yaml #ścieżka do pliku conda.yaml z definicją środowiska
+docker_env:
-
+  image: adamwojdyla
-#docker_env:
+  volumes: ["/mlflow/tmp/mlruns"]
 #  image: mlflow-docker-example-environment
 entry_points:
  main:
    parameters:
-      epochs: {type: float, default: 0.5}
+      epochs: {type: float, default: 1}
-      l1_ratio: {type: float, default: 0.1}
+    command: "python3 lab08_deepLearining_mlflow.py {epochs}"
    command: "python train.py {epochs}"
--- a/lab06_evaluation.py
+++ b/lab06_evaluation.py
@ -14,6 +14,7 @@ import os
 import matplotlib.pyplot as plt
 import json
 class Model(nn.Module):
    def __init__(self, input_dim):
        super(Model, self).__init__()
@ -27,6 +28,7 @@ class Model(nn.Module):
        x = F.softmax(self.layer3(x))  # To check with the loss function
        return x
 def prepare_labels_features(dataset):
    """ Label make column"""
    le = preprocessing.LabelEncoder()
--- a/lab08_deepLearining_mlflow.py
+++ b/lab08_deepLearining_mlflow.py
@ -0,0 +1,164 @@
 #!/usr/bin/python
 from urllib.parse import urlparse
 import mlflow
 import numpy as np
 import torch
 from torch import nn
 from torch.autograd import Variable
 from sklearn.datasets import load_iris
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score, f1_score
 import torch.nn.functional as F
 import pandas as pd
 from sklearn import preprocessing
 import sys
 import logging
 import mlflow
 import mlflow.sklearn
 logging.basicConfig(level=logging.WARN)
 logger = logging.getLogger(__name__)
 # mlflow.set_tracking_uri("http://localhost:5000/")
 mlflow.set_experiment("s444507")
 class Model(nn.Module):
    def __init__(self, input_dim):
        super(Model, self).__init__()
        self.layer1 = nn.Linear(input_dim, 100)
        self.layer2 = nn.Linear(100, 60)
        self.layer3 = nn.Linear(60, 5)
    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.softmax(self.layer3(x))  # To check with the loss function
        return x
 def load_dataset_raw():
    """ Load data from .csv file. """
    cars = pd.read_csv('./Car_Prices_Poland_Kaggle.csv', usecols=[1, 4, 5, 6, 10], sep=',')
    return cars
 def load_dataset_files():
    """ Load shuffled, splitted dev and train files from .csv files. """
    cars_dev = pd.read_csv('./Car_Prices_Poland_Kaggle_dev.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
    cars_train = pd.read_csv('./Car_Prices_Poland_Kaggle_train.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
    return cars_dev, cars_train
 def remove_rows(data_dev, data_train):
    dev_removed_rows = data_dev.loc[(data_dev['0'] == 'audi') | (data_dev['0'] == 'bmw') | (data_dev['0'] == 'ford') | (data_dev['0'] == 'opel') | (data_dev['0'] == 'volkswagen')]
    train_removed_rows = data_train.loc[(data_train['0'] == 'audi') | (data_train['0'] == 'bmw') | (data_train['0'] == 'ford') | (data_train['0'] == 'opel') | (data_train['0'] == 'volkswagen')]
    return dev_removed_rows, train_removed_rows
 def prepare_labels_features(dataset):
    """ Label make column"""
    le = preprocessing.LabelEncoder()
    mark_column = np.array(dataset[:]['0'])
    le.fit(mark_column)
    print(list(le.classes_))
    lab = le.transform(mark_column)
    feat = dataset.drop(['0'], axis=1).to_numpy()
    mm_scaler = preprocessing.MinMaxScaler()
    feat = mm_scaler.fit_transform(feat)
    return lab, feat
 def my_main(epoch):
    print("Loading dataset...")
    dev, train = load_dataset_files()
    print("Dataset loaded")
    print("Preparing dataset...")
    dev, train = remove_rows(dev, train)
    labels_train, features_train = prepare_labels_features(train)
    labels_test, features_test = prepare_labels_features(dev)
    print("Dataset prepared")
    # Training
    model = Model(features_train.shape[1])
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    loss_fn = nn.CrossEntropyLoss()
    # number of epochs is parametrized
    try:
        epochs_n = int(epoch)
    except Exception as e:
        print(e)
        print("Setting default epochs value to 10.")
        epochs_n = 10
    print(f"Number of epochs: {epochs_n}")
    mlflow.log_param("epochs", epochs_n)
    print("Starting model training...")
    x_train, y_train = Variable(torch.from_numpy(features_train)).float(), Variable(torch.from_numpy(labels_train)).long()
    for epoch in range(1, epochs_n + 1):
        print("Epoch #", epoch)
        y_pred = model(x_train)
        loss = loss_fn(y_pred, y_train)
        print(f"The loss calculated: {loss}")
        # Zero gradients
        optimizer.zero_grad()
        loss.backward()  # Gradients
        optimizer.step()  # Update
    print("Model training finished")
    x_test = Variable(torch.from_numpy(features_test)).float()
    pred = model(x_test)
    pred = pred.detach().numpy()
    print(f"The accuracy metric is: {accuracy_score(labels_test, np.argmax(pred, axis=1))}")
    accuracy = accuracy_score(labels_test, np.argmax(pred, axis=1))
    f1 = f1_score(labels_test, np.argmax(pred, axis=1), average='weighted')
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("f1", f1)
    # Infer model signature to log it
    signature = mlflow.models.signature.infer_signature(features_train, labels_train)
    tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
    if tracking_url_type_store != "file":
        mlflow.pytorch.log_model(model, "model", registered_model_name="s444507", signature=signature,
                                 input_example=features_train)
    else:
        mlflow.pytorch.log_model(model, "model", signature=signature, input_example=features_train)
        mlflow.pytorch.save_model(model, "my_model", signature=signature, input_example=features_train)
    print("Saving model to file...")
    torch.save(model, "CarPrices_pytorch_model.pkl")
    print("Model saved with name: CarPrices_pytorch_model.pkl")
    saved_model = torch.load("CarPrices_pytorch_model.pkl")
    print(np.argmax(saved_model(x_test[0]).detach().numpy(), axis=0))
    pd_predictions = pd.DataFrame(pred)
    pd_predictions.to_csv("./prediction_results.csv")
 try:
    epochs = int(sys.argv[1])
 except Exception as e:
    print(e)
    print("Setting default epochs value to 1000.")
    epochs = 100
 with mlflow.start_run() as run:
    print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
    print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))
    my_main(epochs)