mlflow
All checks were successful
s444501-training/pipeline/head This commit looks good
s444501-evaluation/pipeline/head This commit looks good

This commit is contained in:
s444501 2022-05-13 00:59:11 +02:00
parent f4d0ad9c06
commit 8eca92baef
4 changed files with 57 additions and 37 deletions

View File

@ -9,8 +9,7 @@ RUN pip3 install matplotlib
RUN pip3 install sklearn RUN pip3 install sklearn
RUN pip3 install kaggle RUN pip3 install kaggle
RUN pip3 install torch RUN pip3 install torch
RUN pip3 install sacred RUN pip3 install mlflow
RUN pip3 install pymongo
RUN mkdir /.kaggle && chmod o+w /.kaggle RUN mkdir /.kaggle && chmod o+w /.kaggle

12
MLproject Normal file
View File

@ -0,0 +1,12 @@
name: s444501
docker_env:
image: zadanie
entry_points:
main:
parameters:
epochs: {type: float, default: 100}
command: "python biblioteki_ml.py {epochs}"
eval:
command: "python eval.py"

View File

@ -1,11 +1,23 @@
import sys import sys
from urllib.parse import urlparse
import numpy as np
import mlflow
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from sacred.observers import FileStorageObserver, MongoObserver
from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import LabelEncoder
import pandas as pd import pandas as pd
from sacred import Experiment
# MLFlow 1
mlflow.set_experiment("s444501")
# Parametry z konsoli
try:
epochs = int(sys.argv[1])
except:
print('No epoch number passed. Defaulting to 100')
epochs = 100
# Model # Model
@ -23,32 +35,7 @@ class Model(nn.Module):
return x return x
# Sacred def train_main(epochs, run):
ex = Experiment()
ex.observers.append(FileStorageObserver('my_runs'))
# Parametry treningu -> my_runs/X/config.json
# Plik z modelem jako artefakt -> my_runs/X/model.pkl
# Kod źródłowy -> my_runs/_sources/biblioteki_ml_XXXXXXXXXXX.py
# Wyniki (ostateczny loss) -> my_runs/X/metrics.json
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',
db_name='sacred'))
@ex.config
def my_config():
epochs = 100
@ex.automain
def train_main(epochs, _run):
# Parametry z konsoli
# try:
# epochs = int(sys.argv[1])
# except:
# print('No epoch number passed. Defaulting to 100')
# epochs = 100
# Ładowanie danych # Ładowanie danych
train_set = pd.read_csv('d_train.csv', encoding='latin-1') train_set = pd.read_csv('d_train.csv', encoding='latin-1')
train_set = train_set[['Rating', 'Branch', 'Reviewer_Location']] train_set = train_set[['Rating', 'Branch', 'Reviewer_Location']]
@ -103,7 +90,6 @@ def train_main(epochs, _run):
optimizer.zero_grad() optimizer.zero_grad()
loss.backward() loss.backward()
optimizer.step() optimizer.step()
_run.log_scalar("training.final_loss", losses[-1].item()) # Ostateczny loss
# Testy # Testy
@ -115,13 +101,36 @@ def train_main(epochs, _run):
df = pd.DataFrame({'Testing Y': y_test, 'Predicted Y': preds}) df = pd.DataFrame({'Testing Y': y_test, 'Predicted Y': preds})
df['Correct'] = [1 if corr == pred else 0 for corr, pred in zip(df['Testing Y'], df['Predicted Y'])] df['Correct'] = [1 if corr == pred else 0 for corr, pred in zip(df['Testing Y'], df['Predicted Y'])]
print(f"{df['Correct'].sum() / len(df)} percent of predictions correct") correct = df['Correct'].sum() / len(df)
print(f"{correct} percent of predictions correct")
# Logi
mlflow.log_param("epochs", epochs)
mlflow.log_metric("final_loss", losses[-1].item())
mlflow.log_metric("accuracy", correct)
signature = mlflow.models.signature.infer_signature(X_train.numpy(), np.array(preds))
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
if tracking_url_type_store != "file":
mlflow.pytorch.log_model(model,
's444501',
registered_model_name='s444501',
signature=signature,
input_example=X_test.numpy())
else:
mlflow.pytorch.log_model(model,
's444501',
signature=signature,
input_example=X_test.numpy())
# Zapis do pliku # Zapis do pliku
df.to_csv('neural_network_prediction_results.csv', index=False) df.to_csv('neural_network_prediction_results.csv', index=False)
torch.save(model, "model.pkl") torch.save(model, "model.pkl")
# Zapis Sacred
ex.add_artifact("model.pkl") with mlflow.start_run() as run:
ex.add_artifact("neural_network_prediction_results.csv") print(f"MLflow run experiment_id: {run.info.experiment_id}")
print(f"MLflow run artifact_uri: {run.info.artifact_uri}")
train_main(epochs, run)

View File

@ -24,14 +24,14 @@ pipeline {
stage('Train model') { stage('Train model') {
steps { steps {
withEnv(["EPOCH=${params.EPOCH}"]) { withEnv(["EPOCH=${params.EPOCH}"]) {
sh 'python biblioteki_ml.py with "epochs=$EPOCH"' sh 'python biblioteki_ml.py $EPOCH'
} }
} }
} }
stage('Archive artifacts') { stage('Archive artifacts') {
steps { steps {
archiveArtifacts artifacts: 'model.pkl, neural_network_prediction_results.csv' archiveArtifacts artifacts: 'model.pkl, neural_network_prediction_results.csv'
archiveArtifacts artifacts: 'my_runs/**' archiveArtifacts artifacts: 'mlruns/**'
} }
} }
stage ('Model - evaluation') { stage ('Model - evaluation') {