Registred model.
Some checks failed
s426206-training/pipeline/head There was a failure building this commit

This commit is contained in:
Jan Nowak 2021-05-23 14:01:31 +02:00
parent bd6f5dac48
commit fe85610850
4 changed files with 121 additions and 18 deletions

View File

@ -35,10 +35,10 @@ pipeline {
sh 'chmod +x dlgssdpytorch.py' sh 'chmod +x dlgssdpytorch.py'
sh 'python3 ./dlgssdpytorch.py $PARAMETRY' sh 'python3 ./dlgssdpytorch.py $PARAMETRY'
sh 'chmod +x train_mlflow.py' sh 'chmod +x train_mlflow.py'
//sh 'chmod +x generate_MLmodel.py'
//sh 'python3 ./generate_MLmodel.py'
sh 'python3 ./train_mlflow.py -e 5' sh 'python3 ./train_mlflow.py -e 5'
//sh 'mlflow run --experiment-name s426206 .' //Uruchamiany projekt nie moze znajdowac sie w katalogach z wielkimi literami. //sh 'mlflow run --experiment-name s426206 .' //Uruchamiany projekt nie moze znajdowac sie w katalogach z wielkimi literami.
sh 'chmod +x generate_MLmodel.py'
sh 'python3 ./generate_MLmodel.py -e 5'
} }
} }
} }

View File

@ -4,7 +4,6 @@ name: 426206mlflow
docker_env: docker_env:
image: rokoch/ium:01 image: rokoch/ium:01
volumes: ["/etc/passwd:/etc/passwd"]
entry_points: entry_points:
main: main:

View File

@ -1,12 +1,13 @@
import torch import torch
import numpy as np import numpy as np
import torch.nn as nn import torch.nn as nn
import torch.optim as optim import torch.optim as optim
from torch.utils.data import Dataset, TensorDataset, DataLoader from torch.utils.data import Dataset, TensorDataset, DataLoader
import argparse
import mlflow import mlflow
import mlflow.pytorch import mlflow.pytorch
from urllib.parse import urlparse from urllib.parse import urlparse
from mlflow.models.signature import infer_signature from mlflow.models.signature import infer_signature
class LayerLinearRegression(nn.Module): class LayerLinearRegression(nn.Module):
@ -19,25 +20,125 @@ class LayerLinearRegression(nn.Module):
# Now it only takes a call to the layer to make predictions # Now it only takes a call to the layer to make predictions
return self.linear(x) return self.linear(x)
checkpoint = torch.load('model.pt') if __name__ == "__main__":
model = LayerLinearRegression() parser = argparse.ArgumentParser(description='Program do uczenia modelu')
#optimizer = optim.SGD(model.parameters(), lr=checkpoint['loss']) parser.add_argument('-l', '--lr', type=float, default=1e-3, help="Współczynik uczenia (lr)", required=False)
parser.add_argument('-e', '--epochs', type=int, default=100, help="Liczba epok", required=False)
args = parser.parse_args()
model.load_state_dict(checkpoint['model_state_dict']) lr = args.lr
n_epochs = args.epochs
mlflow.set_experiment("s426206")
with mlflow.start_run():
mlflow.log_param("lr", lr)
mlflow.log_param("epochs", n_epochs)
train_dataset = torch.load('train_dataset.pt') train_dataset = torch.load('train_dataset.pt')
x_train = np.array(train_dataset)[:,0] #(Sales Sum row) #val_dataset = torch.load('val_dataset.pt')
input_example = np.reshape(x_train, (-1,1))
with torch.no_grad(): train_loader = DataLoader(dataset=train_dataset)
model.eval() #val_loader = DataLoader(dataset=val_dataset)
siganture = infer_signature(x_train, model(torch.tensor(np.reshape(x_train, (-1,1))).float()).numpy())
model = LayerLinearRegression()
# Checks model's parameters
#print(model.state_dict())
loss_fn = nn.MSELoss(reduction='mean')
optimizer = optim.SGD(model.parameters(), lr=lr)
def make_train_step(model, loss_fn, optimizer):
# Builds function that performs a step in the train loop
def train_step(x, y):
# Sets model to TRAIN mode
model.train()
# Makes predictions
yhat = model(x)
# Computes loss
loss = loss_fn(y, yhat)
# Computes gradients
loss.backward()
# Updates parameters and zeroes gradients
optimizer.step()
optimizer.zero_grad()
# Returns the loss
return loss.item()
# Returns the function that will be called inside the train loop
return train_step
# Creates the train_step function for our model, loss function and optimizer
train_step = make_train_step(model, loss_fn, optimizer)
training_losses = []
validation_losses = []
#print(model.state_dict())
# For each epoch...
for epoch in range(n_epochs):
losses = []
# Uses loader to fetch one mini-batch for training
for x_batch, y_batch in train_loader:
# NOW, sends the mini-batch data to the device
# so it matches location of the MODEL
# x_batch = x_batch.to(device)
# y_batch = y_batch.to(device)
# One stpe of training
loss = train_step(x_batch, y_batch)
losses.append(loss)
training_loss = np.mean(losses)
training_losses.append(training_loss)
mlflow.log_metric("MSE", training_loss)
# After finishing training steps for all mini-batches,
# it is time for evaluation!
# Ewaluacja jest już tutaj nie potrzebna bo odbywa sie w evaluation.py. Można jednak włączyć podgląd ewaluacji dla poszczególnych epok.
# # We tell PyTorch to NOT use autograd...
# # Do you remember why?
# with torch.no_grad():
# val_losses = []
# # Uses loader to fetch one mini-batch for validation
# for x_val, y_val in val_loader:
# # Again, sends data to same device as model
# # x_val = x_val.to(device)
# # y_val = y_val.to(device)
# model.eval()
# # Makes predictions
# yhat = model(x_val)
# # Computes validation loss
# val_loss = loss_fn(y_val, yhat)
# val_losses.append(val_loss.item())
# validation_loss = np.mean(val_losses)
# validation_losses.append(validation_loss)
# print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}")
print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t")
torch.save({
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': lr,
}, 'model.pt')
x_train = np.array(train_dataset)[:,0] #(Sales Sum row)
input_example = np.reshape(x_train, (-1,1))
with torch.no_grad():
model.eval()
siganture = infer_signature(x_train, model(torch.tensor(np.reshape(x_train, (-1,1))).float()).numpy())
#mlflow.set_experiment("s426206")
mlflow.set_tracking_uri("http://172.17.0.1:5000") mlflow.set_tracking_uri("http://172.17.0.1:5000")
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
# print(tracking_url_type_store) # print(tracking_url_type_store)
# Model registry does not work with file store # Model registry does not work with file store
if tracking_url_type_store != "file": if tracking_url_type_store != "file":
mlflow.sklearn.log_model(model, "model", registered_model_name="s426206", signature=siganture, input_example=input_example) mlflow.pytorch.log_model(model, "model", registered_model_name="s426206", signature=siganture, input_example=input_example)
else: else:
mlflow.sklearn.log_model(model, "model", signature=siganture, input_example=input_example) mlflow.pytorch.log_model(model, "model", signature=siganture, input_example=input_example)
mlflow.pytorch.save_model(model, "my_model", signature=siganture, input_example=input_example)
#export MLFLOW_CONDA_HOME=/home/jan/miniconda3/
#mlflow models serve -m my_model/

View File

@ -139,3 +139,6 @@ if __name__ == "__main__":
else: else:
mlflow.pytorch.log_model(model, "model", signature=siganture, input_example=input_example) mlflow.pytorch.log_model(model, "model", signature=siganture, input_example=input_example)
mlflow.pytorch.save_model(model, "my_model", signature=siganture, input_example=input_example) mlflow.pytorch.save_model(model, "my_model", signature=siganture, input_example=input_example)
#export MLFLOW_CONDA_HOME=/home/jan/miniconda3/
#mlflow models serve -m my_model/