mlflow done.
All checks were successful
s426206-evaluation/pipeline/head This commit looks good
s426206-training/pipeline/head This commit looks good

This commit is contained in:
Jan Nowak 2021-05-16 18:53:33 +02:00
parent 1f36b724e1
commit fd1cf000dd
6 changed files with 158 additions and 5 deletions

1
.gitignore vendored
View File

@ -5,3 +5,4 @@ metrics.tsv
*.pt *.pt
plot.png plot.png
my_runs my_runs
mlruns

View File

@ -14,6 +14,7 @@ RUN pip3 install -r requirments.txt
RUN pip3 install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html RUN pip3 install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
RUN pip3 install sacred RUN pip3 install sacred
RUN pip3 install pymongo RUN pip3 install pymongo
RUN pip3 install mlflow
# Stwórzmy w kontenerze (jeśli nie istnieje) katalog /app i przejdźmy do niego (wszystkie kolejne polecenia RUN, CMD, ENTRYPOINT, COPY i ADD będą w nim wykonywane) # Stwórzmy w kontenerze (jeśli nie istnieje) katalog /app i przejdźmy do niego (wszystkie kolejne polecenia RUN, CMD, ENTRYPOINT, COPY i ADD będą w nim wykonywane)
WORKDIR /app WORKDIR /app
@ -30,3 +31,5 @@ COPY ./evaluation.py ./
RUN chmod +x evaluation.py RUN chmod +x evaluation.py
COPY ./plot.py ./ COPY ./plot.py ./
RUN chmod +x plot.py RUN chmod +x plot.py
COPY ./train_mlflow.py ./
RUN chmod +x train_mlflow.py

View File

@ -33,6 +33,8 @@ pipeline {
img.inside { img.inside {
sh 'chmod +x dlgssdpytorch.py' sh 'chmod +x dlgssdpytorch.py'
sh 'python3 ./dlgssdpytorch.py $PARAMETRY' sh 'python3 ./dlgssdpytorch.py $PARAMETRY'
sh 'chmod +x train_mlflow.py'
sh 'python3 ./train_mlflow.py'
} }
} }
} }

View File

@ -8,6 +8,7 @@ import argparse
from sacred import Experiment from sacred import Experiment
from sacred.observers import MongoObserver, FileStorageObserver from sacred.observers import MongoObserver, FileStorageObserver
ex = Experiment("426206", interactive=False, save_git_info=False) ex = Experiment("426206", interactive=False, save_git_info=False)
ex.observers.append(FileStorageObserver('my_runs')) ex.observers.append(FileStorageObserver('my_runs'))
ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@172.17.0.1:27017', db_name='sacred')) ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@172.17.0.1:27017', db_name='sacred'))
@ -116,11 +117,11 @@ def train(lr, n_epochs, _run):
# print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}") # print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}")
print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t") print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t")
torch.save({ torch.save({
'model_state_dict': model.state_dict(), 'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(), 'optimizer_state_dict': optimizer.state_dict(),
'loss': lr, 'loss': lr,
}, 'model.pt') }, 'model.pt')
@ex.automain @ex.automain
def my_main(lr, n_epochs, _run): def my_main(lr, n_epochs, _run):

133
train_mlflow.py Normal file
View File

@ -0,0 +1,133 @@
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, TensorDataset, DataLoader
import argparse
import mlflow
import mlflow.pytorch
from urllib.parse import urlparse
class LayerLinearRegression(nn.Module):
def __init__(self):
super().__init__()
# Instead of our custom parameters, we use a Linear layer with single input and single output
self.linear = nn.Linear(1, 1)
def forward(self, x):
# Now it only takes a call to the layer to make predictions
return self.linear(x)
parser = argparse.ArgumentParser(description='Program do uczenia modelu')
parser.add_argument('-l', '--lr', type=float, default=1e-3, help="Współczynik uczenia (lr)", required=False)
parser.add_argument('-e', '--epochs', type=int, default=100, help="Liczba epok", required=False)
args = parser.parse_args()
if __name__ == "__main__":
lr = args.lr
n_epochs = args.epochs
with mlflow.start_run():
mlflow.log_param("lr", lr)
mlflow.log_param("epochs", n_epochs)
train_dataset = torch.load('train_dataset.pt')
#val_dataset = torch.load('val_dataset.pt')
train_loader = DataLoader(dataset=train_dataset)
#val_loader = DataLoader(dataset=val_dataset)
model = LayerLinearRegression()
# Checks model's parameters
#print(model.state_dict())
loss_fn = nn.MSELoss(reduction='mean')
optimizer = optim.SGD(model.parameters(), lr=lr)
def make_train_step(model, loss_fn, optimizer):
# Builds function that performs a step in the train loop
def train_step(x, y):
# Sets model to TRAIN mode
model.train()
# Makes predictions
yhat = model(x)
# Computes loss
loss = loss_fn(y, yhat)
# Computes gradients
loss.backward()
# Updates parameters and zeroes gradients
optimizer.step()
optimizer.zero_grad()
# Returns the loss
return loss.item()
# Returns the function that will be called inside the train loop
return train_step
# Creates the train_step function for our model, loss function and optimizer
train_step = make_train_step(model, loss_fn, optimizer)
training_losses = []
validation_losses = []
#print(model.state_dict())
# For each epoch...
for epoch in range(n_epochs):
losses = []
# Uses loader to fetch one mini-batch for training
for x_batch, y_batch in train_loader:
# NOW, sends the mini-batch data to the device
# so it matches location of the MODEL
# x_batch = x_batch.to(device)
# y_batch = y_batch.to(device)
# One stpe of training
loss = train_step(x_batch, y_batch)
losses.append(loss)
training_loss = np.mean(losses)
training_losses.append(training_loss)
mlflow.log_metric("MSE", training_loss)
# After finishing training steps for all mini-batches,
# it is time for evaluation!
# Ewaluacja jest już tutaj nie potrzebna bo odbywa sie w evaluation.py. Można jednak włączyć podgląd ewaluacji dla poszczególnych epok.
# # We tell PyTorch to NOT use autograd...
# # Do you remember why?
# with torch.no_grad():
# val_losses = []
# # Uses loader to fetch one mini-batch for validation
# for x_val, y_val in val_loader:
# # Again, sends data to same device as model
# # x_val = x_val.to(device)
# # y_val = y_val.to(device)
# model.eval()
# # Makes predictions
# yhat = model(x_val)
# # Computes validation loss
# val_loss = loss_fn(y_val, yhat)
# val_losses.append(val_loss.item())
# validation_loss = np.mean(val_losses)
# validation_losses.append(validation_loss)
# print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}")
print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t")
torch.save({
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': lr,
}, 'model.pt')
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
# Model registry does not work with file store
if tracking_url_type_store != "file":
# Register the model
# There are other ways to use the Model Registry, which depends on the use case,
# please refer to the doc for more information:
# https://mlflow.org/docs/latest/model-registry.html#api-workflow
mlflow.sklearn.log_model(model, "model", registered_model_name="ElasticnetWineModel")
else:
mlflow.sklearn.log_model(model, "model")

13
train_mlflow/MLproject Normal file
View File

@ -0,0 +1,13 @@
name: 426206mlflow
#conda_env: conda.yaml #ścieżka do pliku conda.yaml z definicją środowiska
#docker_env:
# image: mlflow-docker-example-environment
entry_points:
main:
parameters:
epochs: {type: int, default: 100}
lr: {type: float, default: 0.001}
command: "python3 train_mlflow.py -e {epochs} -l {lr}"