Add eval dependency

Remove out files
Add missing data
2022-06-05 22:30:54 +02:00 · 2022-06-05 22:28:52 +02:00 · 2022-06-05 22:25:53 +02:00 · 2022-06-05 22:19:45 +02:00 · 2022-06-05 22:03:25 +02:00 · 2022-06-05 22:00:22 +02:00
20 changed files with 247 additions and 40 deletions
--- a/.dvc/.gitignore
+++ b/.dvc/.gitignore
@ -0,0 +1,3 @@
+/config.local
+/tmp
+/cache
--- a/.dvc/config
+++ b/.dvc/config
@ -0,0 +1,4 @@
+[core]
+    remote = ium_ssh_remote
+['remote "ium_ssh_remote"']
+    url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
--- a/.dvcignore
+++ b/.dvcignore
@ -0,0 +1,3 @@
+# Add patterns of files dvc should ignore, which could improve
+# the performance. Learn more at
+# https://dvc.org/doc/user-guide/dvcignore
--- a/.gitignore
+++ b/.gitignore
@ -20,3 +20,5 @@ evaluation_results.txt
 model_out
 trend.png
 sacred_runs
+mlruns
+/data
--- a/4
+++ b/4
@ -5,11 +5,13 @@ WORKDIR /app
 ADD ./requirements.txt .
 RUN pip install -r requirements.txt

-ADD . .
+ADD ./download_dataset.sh .
 ARG KAGGLE_USERNAME
 ARG KAGGLE_KEY

 RUN chmod a+x *.sh
 RUN ./download_dataset.sh

+ADD . .
+
 CMD python train_model.py
--- a/41
+++ b/41
@ -0,0 +1,41 @@
+pipeline {
+    parameters {
+        string(
+            defaultValue: 'marcinkostrzewski',
+            description: 'Kaggle username',
+            name: 'KAGGLE_USERNAME',
+            trim: false
+        )
+        password(
+            defaultValue: '',
+            description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
+            name: 'KAGGLE_KEY'
+        )
+    }
+
+    agent {
+        dockerfile {
+            additionalBuildArgs "--build-arg KAGGLE_USERNAME=${params.KAGGLE_USERNAME} --build-arg KAGGLE_KEY=${params.KAGGLE_KEY} -t s444409-create-dataset"
+        }
+    }
+
+    stages {
+         stage("Run DVC") {
+            steps{
+                withCredentials([
+                    sshUserPrivateKey(
+                        credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18',
+                        keyFileVariable: 'IUM_SFTP_KEY',
+                        passphraseVariable: '',
+                        usernameVariable: 'USER'
+                    )
+                ]) {
+                    sh 'dvc remote modify --local ium_ssh_remote keyfile $IUM_SFTP_KEY'
+                    sh 'dvc remote modify --local ium_ssh_remote password IUM@2021'
+                    sh 'dvc pull'
+                    sh 'dvc repro'
+                }
+            }
+         }
+    }
+}
--- a/34
+++ b/34
@ -0,0 +1,34 @@
+pipeline {
+    agent {
+        docker {
+            image 's444409-create-dataset'
+        }
+    }
+    parameters {
+        string(
+            defaultValue: '{\\"inputs\\": [[0.51, 0.86], [0.79, 0.79], [0.74, 0.77], [0.66, 0.73]]}',
+            description: 'Input',
+            name: 'INPUT',
+            trim: true
+        )
+        buildSelector(
+			defaultSelector: lastSuccessful(),
+			description: 'Which build to use for copying artifacts',
+			name: 'BUILD_SELECTOR'
+		)
+    }
+    
+    stages {
+        stage('Get dataset from artifact') {
+            steps {
+                copyArtifacts projectName: 's444356-training/master', selector: buildParameter('BUILD_SELECTOR')
+            }
+        }
+        stage('Predict values using model from artifact') {
+            steps {
+                sh "echo ${params.INPUT} > input_example.json"
+                sh "python predict_s444356.py"
+            }
+        }
+    }
+}
--- a/16
+++ b/16
@ -0,0 +1,16 @@
+pipeline {
+    agent {
+        docker {
+            image 's444409-create-dataset'
+            args '-v /mlruns:/mlruns'
+        }
+    }
+    
+    stages {
+        stage('Predict values using model from artifact') {
+            steps {
+                sh "python predict_s444356-from-registry.py"
+            }
+        }
+    }
+}
--- a/10
+++ b/10
@ -23,6 +23,7 @@ pipeline {
    agent {
        docker {
            image 's444409-create-dataset'
+            args '-v /mlruns:/mlruns'
        }
    }
    
@ -34,13 +35,14 @@ pipeline {
        }
        stage('Train model') {
            steps {
-                sh "python train_model.py with 'epochs=${params.EPOCHS}' 'batch_size=${params.BATCHSIZE}'"
+                sh "python train_model.py -e ${params.EPOCHS} -b ${params.BATCHSIZE}"
+                archiveArtifacts artifacts: 'model_out', onlyIfSuccessful: true
+                archiveArtifacts artifacts: 'mlruns/**', onlyIfSuccessful: true
+                sh 'rm -r mlruns'
            }
        }
-        stage('Archive model and evaluate it') {
+        stage('Evaluate model') {
            steps {
-               archiveArtifacts artifacts: 'model_out', onlyIfSuccessful: true
-               archiveArtifacts artifacts: 'sacred_runs/**', onlyIfSuccessful: true
               build job: "s444409-evaluation/${params.BRANCH}/", parameters: [string(name: 'BRANCH', value: "${params.BRANCH}")]
            }
        }
--- a/14
+++ b/14
@ -0,0 +1,14 @@
+name: ium_s444409
+
+docker_env:
+  image: s444409-create-dataset
+  volumes: ["/tmp/mlruns:/mlruns:rw"]
+
+entry_points:
+  main:
+    parameters:
+      epochs: {type: float, default: 5}
+      batch_size: {type: float, default: 64}
+    command: "python train_model.py -e {epochs} -b {batch_size}"
+  eval:
+    command: "python eval_model.py"
--- a/data.dvc
+++ b/data.dvc
@ -0,0 +1,5 @@
+outs:
+- md5: 107d31dc52f58274023d18db2f0c5b7a.dir
+  size: 24456416
+  nfiles: 17
+  path: data
--- a/dvc.lock
+++ b/dvc.lock
@ -0,0 +1,4 @@
+schema: '2.0'
+stages:
+  prepare:
+    cmd: download_dataset.sh
--- a/dvc.yaml
+++ b/dvc.yaml
@ -0,0 +1,16 @@
+stages:
+  prepare:
+    cmd: download_dataset.sh
+    
+  train:
+    cmd: python3 train_model.py
+    deps:
+      - data/Plant_1_Generation_data.csv.train
+    params:
+      - batch_size
+      - epochs
+  
+  eval:
+    deps: 
+      - model_out
+    cmd: python3 eval_model.py
--- a/environment.yml
+++ b/environment.yml
@ -0,0 +1,11 @@
+name: ium444409
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - kaggle
+  - pandas
+  - pytorch
+  - matplotlib
+  - mlflow
+prefix: C:\Users\komar\.conda\envs\ium444409
--- a/model.py
+++ b/model.py
@ -24,7 +24,7 @@ class MLP(nn.Module):

    def forward(self, x):
        x = x.view(x.size(0), -1)
-        return self.layers(x)
+        return self.layers(x.float())


 class PlantsDataset(Dataset):
--- a/params.yaml
+++ b/params.yaml
@ -0,0 +1,2 @@
+batch_size: 64
+epochs: 5
--- a/predict_s444356-from-registry.py
+++ b/predict_s444356-from-registry.py
@ -0,0 +1,14 @@
+import json
+import mlflow
+import numpy as np
+
+logged_model = '/mlruns/13/da5c6167bb45403fa35569849a1fbc13/artifacts/model'
+loaded_model = mlflow.pyfunc.load_model(logged_model)
+
+
+with open(f'{logged_model}/input_example.json') as f:
+    data = json.load(f)
+    input_example = np.array([data['inputs'][0]], dtype=np.float64).reshape(-1, 2)
+
+
+print(f'Prediction: {loaded_model.predict(input_example)}')
--- a/predict_s444356.py
+++ b/predict_s444356.py
@ -0,0 +1,14 @@
+import json
+import mlflow
+import numpy as np
+
+logged_model = 'mlruns/1/4b83e774512444188fb587288818c298/artifacts/model'
+loaded_model = mlflow.pyfunc.load_model(logged_model)
+
+
+with open('input_example.json') as f:
+    data = json.load(f)
+    input_example = np.array([data['inputs'][0]], dtype=np.float64).reshape(-1, 2)
+
+
+print(f'Prediction: {loaded_model.predict(input_example)}')
--- a/requirements.txt
+++ b/requirements.txt
@ -4,4 +4,8 @@ torch==1.11.0
 numpy~=1.22.3
 matplotlib==3.5.2
 sacred==0.8.2
-pymongo==4.1.1
+pymongo==4.1.1
+mlflow==1.25.1
+dvc==2.10.2
+dvc-ssh==0.0.1a0
+paramiko==2.11.0
--- a/train_model.py
+++ b/train_model.py
@ -1,12 +1,11 @@
 import argparse
+from urllib.parse import urlparse

+import mlflow
 import numpy as np
-import pandas as pd
 import torch
-from sacred.observers import FileStorageObserver, MongoObserver
 from torch import nn
-from torch.utils.data import DataLoader, Dataset
-from sacred import Experiment
+from torch.utils.data import DataLoader

 from model import PlantsDataset, MLP, train, test

@ -15,13 +14,30 @@ default_epochs = 5

 device = "cuda" if torch.cuda.is_available() else "cpu"

+# mlflow.set_tracking_uri("http://172.17.0.1:5000")
+mlflow.set_experiment("s444409")
+
+
+def setup_args():
+    args_parser = argparse.ArgumentParser(prefix_chars='-')
+    args_parser.add_argument('-b', '--batchSize', type=int, default=default_batch_size)
+    args_parser.add_argument('-e', '--epochs', type=int, default=default_epochs)
+
+    return args_parser.parse_args()
+
+
+if __name__ == "__main__":
+    args = setup_args()
+    batch_size = args.batchSize
+    epochs = args.epochs

-def main(batch_size, epochs, _run):
    print(f"Using {device} device")

    plant_test = PlantsDataset('data/Plant_1_Generation_Data.csv.test')
    plant_train = PlantsDataset('data/Plant_1_Generation_Data.csv.train')

+    input_example = np.array([plant_test.x_train.numpy()[0]])
+
    train_dataloader = DataLoader(plant_train, batch_size=batch_size)
    test_dataloader = DataLoader(plant_test, batch_size=batch_size)

@ -35,37 +51,37 @@ def main(batch_size, epochs, _run):

    loss_fn = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
-    for t in range(epochs):
-        print(f"Epoch {t + 1}\n-------------------------------")
-        train(train_dataloader, model, loss_fn, optimizer)
-        last_loss = test(test_dataloader, model, loss_fn)
-        _run.log_scalar('training.loss', last_loss, t)
+
    print("Done!")

    torch.save(model.state_dict(), './model_out')
    print("Model saved in ./model_out file.")
+    with mlflow.start_run() as run:
+        print("MLflow run experiment_id: {0}".format(run.info.experiment_id))
+        print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri))

+        mlflow.log_param("batch_size", batch_size)
+        mlflow.log_param("epochs", epochs)
+        for t in range(epochs):
+            print(f"Epoch {t + 1}\n-------------------------------")
+            train(train_dataloader, model, loss_fn, optimizer)
+            last_loss = test(test_dataloader, model, loss_fn)
+            mlflow.log_metric("rmse", last_loss)

-def setup_experiment():
-    ex = Experiment('Predict power output for a given time')
-    ex.observers.append(FileStorageObserver('sacred_runs'))
-    ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',
-                                      db_name='sacred'))
-    return ex
-
-
-ex = setup_experiment()
-
-
-@ex.config
-def experiment_config():
-    batch_size = 64
-    epochs = 5
-
-
-@ex.automain
-def run(batch_size, epochs, _run):
-    main(batch_size, epochs, _run)
-
-
-ex.add_artifact('model_out')
+        with torch.no_grad():
+            preds = model(plant_test.x_train)
+            signature = mlflow.models.signature.infer_signature(plant_test.x_train.numpy(), preds.numpy())
+        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
+        if tracking_url_type_store != "file":
+            mlflow.log_artifact('model.py')
+            mlflow.pytorch.log_model(
+                model,
+                "s444409",
+                registered_model_name="s444409",
+                signature=signature,
+                input_example=input_example,
+                code_paths=['model.py']
+            )
+        else:
+            mlflow.pytorch.log_model(model, "s444409", signature=signature, input_example=input_example,
+                                     code_paths=['model.py'])
Author	SHA1	Message	Date
Marcin Kostrzewski	d494e86141	Add eval dependency	2022-06-05 22:30:54 +02:00
Marcin Kostrzewski	fb3fb60b49	Remove out files	2022-06-05 22:28:52 +02:00
Marcin Kostrzewski	04e0f99981	Add missing data	2022-06-05 22:25:53 +02:00
Marcin Kostrzewski	114109ecf6	DVC Fixes	2022-06-05 22:19:45 +02:00
Marcin Kostrzewski	3f7b2ec7cf	Add Jenkinsfile-dvc	2022-06-05 22:03:25 +02:00
Marcin Kostrzewski	8d19b52c10	Add remote	2022-06-05 22:00:22 +02:00
Marcin Kostrzewski	73b627f6ee	Add dvc.yaml	2022-06-05 21:58:57 +02:00
Marcin Kostrzewski	7125eeac61	Add data folder to DVC	2022-06-05 21:42:59 +02:00
Marcin Kostrzewski	e06503513e	DVC init	2022-06-05 21:40:34 +02:00
Marcin Kostrzewski	6f82712788	Added conda environment file	2022-05-21 13:38:49 +02:00
Marcin Kostrzewski	a0ca27411a	Escape quotes in input	2022-05-11 21:35:54 +02:00
Marcin Kostrzewski	6f99aa9f06	Update hash	2022-05-11 21:14:48 +02:00
Marcin Kostrzewski	24d2eb83cb	Add input param	2022-05-11 21:11:05 +02:00
Marcin Kostrzewski	cfab6beb44	Add from-registry job	2022-05-11 20:50:59 +02:00
Marcin Kostrzewski	e9d1153a35	Revert file parameter	2022-05-11 20:47:50 +02:00
Marcin Kostrzewski	36e6ac8270	Fix file param	2022-05-11 20:45:18 +02:00
Marcin Kostrzewski	ad21010147	Use build selector All checks were successful s444409-evaluation/pipeline/head This commit looks good Details	2022-05-11 20:30:33 +02:00
Marcin Kostrzewski	8874950fa6	Use file parameter	2022-05-11 20:24:02 +02:00
Marcin Kostrzewski	1679fac800	Log to artifacts All checks were successful s444409-evaluation/pipeline/head This commit looks good Details s444409-training/pipeline/head This commit looks good Details	2022-05-11 20:05:31 +02:00
Marcin Kostrzewski	e93d644059	Log to url All checks were successful s444409-evaluation/pipeline/head This commit looks good Details s444409-training/pipeline/head This commit looks good Details	2022-05-11 20:04:13 +02:00
Marcin Kostrzewski	aa966f8960	Log model.py as artifact All checks were successful s444409-evaluation/pipeline/head This commit looks good Details s444409-training/pipeline/head This commit looks good Details	2022-05-11 19:53:26 +02:00
Marcin Kostrzewski	8a967075c4	Change model path	2022-05-11 19:12:25 +02:00
Marcin Kostrzewski	3fa8d9b448	Floatify All checks were successful s444409-evaluation/pipeline/head This commit looks good Details s444409-training/pipeline/head This commit looks good Details	2022-05-11 19:10:26 +02:00
Marcin Kostrzewski	5db367b894	Add scripts to use someones model	2022-05-11 19:03:01 +02:00
Marcin Kostrzewski	39acc4bd27	Add scripts to use someones model	2022-05-11 18:33:30 +02:00
Marcin Kostrzewski	4ebdc06dbe	Fix mountpoint All checks were successful s444409-evaluation/pipeline/head This commit looks good Details s444409-training/pipeline/head This commit looks good Details	2022-05-11 17:56:50 +02:00
Marcin Kostrzewski	4f5f887456	Add tracking to MLFlow Models	2022-05-09 18:36:44 +02:00
Marcin Kostrzewski	858e9ec215	Archive MLFlow artifacts All checks were successful s444409-evaluation/pipeline/head This commit looks good Details s444409-training/pipeline/head This commit looks good Details	2022-05-09 18:23:12 +02:00
Marcin Kostrzewski	60a565098c	Add MLproject for MLFlow	2022-05-09 17:14:37 +02:00
Marcin Kostrzewski	d64d3c1d5a	Improved caching layers	2022-05-09 17:04:58 +02:00
Marcin Kostrzewski	6a01cf5307	Mount `/tmp/mlruns` while training Some checks failed s444409-training/pipeline/head There was a failure building this commit Details	2022-05-09 14:42:38 +02:00
emkarcinos	66c7e1c583	Initial MLFlow setup Some checks failed s444409-training/pipeline/head There was a failure building this commit Details	2022-05-09 13:57:15 +02:00