mlflow
All checks were successful
s444501-training/pipeline/head This commit looks good
s444501-evaluation/pipeline/head This commit looks good

This commit is contained in:
s444501 2022-05-13 00:59:11 +02:00
parent f4d0ad9c06
commit 8eca92baef
4 changed files with 57 additions and 37 deletions

View File

@ -9,8 +9,7 @@ RUN pip3 install matplotlib
RUN pip3 install sklearn
RUN pip3 install kaggle
RUN pip3 install torch
RUN pip3 install sacred
RUN pip3 install pymongo
RUN pip3 install mlflow
RUN mkdir /.kaggle && chmod o+w /.kaggle

12
MLproject Normal file
View File

@ -0,0 +1,12 @@
name: s444501
docker_env:
image: zadanie
entry_points:
main:
parameters:
epochs: {type: float, default: 100}
command: "python biblioteki_ml.py {epochs}"
eval:
command: "python eval.py"

View File

@ -1,11 +1,23 @@
import sys
from urllib.parse import urlparse
import numpy as np
import mlflow
import torch
import torch.nn as nn
import torch.nn.functional as F
from sacred.observers import FileStorageObserver, MongoObserver
from sklearn.preprocessing import LabelEncoder
import pandas as pd
from sacred import Experiment
# MLFlow 1
mlflow.set_experiment("s444501")
# Parametry z konsoli
try:
epochs = int(sys.argv[1])
except:
print('No epoch number passed. Defaulting to 100')
epochs = 100
# Model
@ -23,32 +35,7 @@ class Model(nn.Module):
return x
# Sacred
ex = Experiment()
ex.observers.append(FileStorageObserver('my_runs'))
# Parametry treningu -> my_runs/X/config.json
# Plik z modelem jako artefakt -> my_runs/X/model.pkl
# Kod źródłowy -> my_runs/_sources/biblioteki_ml_XXXXXXXXXXX.py
# Wyniki (ostateczny loss) -> my_runs/X/metrics.json
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',
db_name='sacred'))
@ex.config
def my_config():
epochs = 100
@ex.automain
def train_main(epochs, _run):
# Parametry z konsoli
# try:
# epochs = int(sys.argv[1])
# except:
# print('No epoch number passed. Defaulting to 100')
# epochs = 100
def train_main(epochs, run):
# Ładowanie danych
train_set = pd.read_csv('d_train.csv', encoding='latin-1')
train_set = train_set[['Rating', 'Branch', 'Reviewer_Location']]
@ -103,7 +90,6 @@ def train_main(epochs, _run):
optimizer.zero_grad()
loss.backward()
optimizer.step()
_run.log_scalar("training.final_loss", losses[-1].item()) # Ostateczny loss
# Testy
@ -115,13 +101,36 @@ def train_main(epochs, _run):
df = pd.DataFrame({'Testing Y': y_test, 'Predicted Y': preds})
df['Correct'] = [1 if corr == pred else 0 for corr, pred in zip(df['Testing Y'], df['Predicted Y'])]
print(f"{df['Correct'].sum() / len(df)} percent of predictions correct")
correct = df['Correct'].sum() / len(df)
print(f"{correct} percent of predictions correct")
# Logi
mlflow.log_param("epochs", epochs)
mlflow.log_metric("final_loss", losses[-1].item())
mlflow.log_metric("accuracy", correct)
signature = mlflow.models.signature.infer_signature(X_train.numpy(), np.array(preds))
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
if tracking_url_type_store != "file":
mlflow.pytorch.log_model(model,
's444501',
registered_model_name='s444501',
signature=signature,
input_example=X_test.numpy())
else:
mlflow.pytorch.log_model(model,
's444501',
signature=signature,
input_example=X_test.numpy())
# Zapis do pliku
df.to_csv('neural_network_prediction_results.csv', index=False)
torch.save(model, "model.pkl")
# Zapis Sacred
ex.add_artifact("model.pkl")
ex.add_artifact("neural_network_prediction_results.csv")
with mlflow.start_run() as run:
print(f"MLflow run experiment_id: {run.info.experiment_id}")
print(f"MLflow run artifact_uri: {run.info.artifact_uri}")
train_main(epochs, run)

View File

@ -24,14 +24,14 @@ pipeline {
stage('Train model') {
steps {
withEnv(["EPOCH=${params.EPOCH}"]) {
sh 'python biblioteki_ml.py with "epochs=$EPOCH"'
sh 'python biblioteki_ml.py $EPOCH'
}
}
}
stage('Archive artifacts') {
steps {
archiveArtifacts artifacts: 'model.pkl, neural_network_prediction_results.csv'
archiveArtifacts artifacts: 'my_runs/**'
archiveArtifacts artifacts: 'mlruns/**'
}
}
stage ('Model - evaluation') {