mlflow done.
This commit is contained in:
parent
1f36b724e1
commit
fd1cf000dd
1
.gitignore
vendored
1
.gitignore
vendored
@ -5,3 +5,4 @@ metrics.tsv
|
||||
*.pt
|
||||
plot.png
|
||||
my_runs
|
||||
mlruns
|
||||
|
@ -14,6 +14,7 @@ RUN pip3 install -r requirments.txt
|
||||
RUN pip3 install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
|
||||
RUN pip3 install sacred
|
||||
RUN pip3 install pymongo
|
||||
RUN pip3 install mlflow
|
||||
# Stwórzmy w kontenerze (jeśli nie istnieje) katalog /app i przejdźmy do niego (wszystkie kolejne polecenia RUN, CMD, ENTRYPOINT, COPY i ADD będą w nim wykonywane)
|
||||
WORKDIR /app
|
||||
|
||||
@ -30,3 +31,5 @@ COPY ./evaluation.py ./
|
||||
RUN chmod +x evaluation.py
|
||||
COPY ./plot.py ./
|
||||
RUN chmod +x plot.py
|
||||
COPY ./train_mlflow.py ./
|
||||
RUN chmod +x train_mlflow.py
|
||||
|
@ -33,6 +33,8 @@ pipeline {
|
||||
img.inside {
|
||||
sh 'chmod +x dlgssdpytorch.py'
|
||||
sh 'python3 ./dlgssdpytorch.py $PARAMETRY'
|
||||
sh 'chmod +x train_mlflow.py'
|
||||
sh 'python3 ./train_mlflow.py'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ import argparse
|
||||
from sacred import Experiment
|
||||
from sacred.observers import MongoObserver, FileStorageObserver
|
||||
|
||||
|
||||
ex = Experiment("426206", interactive=False, save_git_info=False)
|
||||
ex.observers.append(FileStorageObserver('my_runs'))
|
||||
ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@172.17.0.1:27017', db_name='sacred'))
|
||||
@ -116,11 +117,11 @@ def train(lr, n_epochs, _run):
|
||||
# print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}")
|
||||
print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t")
|
||||
|
||||
torch.save({
|
||||
'model_state_dict': model.state_dict(),
|
||||
'optimizer_state_dict': optimizer.state_dict(),
|
||||
'loss': lr,
|
||||
}, 'model.pt')
|
||||
torch.save({
|
||||
'model_state_dict': model.state_dict(),
|
||||
'optimizer_state_dict': optimizer.state_dict(),
|
||||
'loss': lr,
|
||||
}, 'model.pt')
|
||||
|
||||
@ex.automain
|
||||
def my_main(lr, n_epochs, _run):
|
||||
|
133
train_mlflow.py
Normal file
133
train_mlflow.py
Normal file
@ -0,0 +1,133 @@
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from torch.utils.data import Dataset, TensorDataset, DataLoader
|
||||
import argparse
|
||||
import mlflow
|
||||
import mlflow.pytorch
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
class LayerLinearRegression(nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
# Instead of our custom parameters, we use a Linear layer with single input and single output
|
||||
self.linear = nn.Linear(1, 1)
|
||||
|
||||
def forward(self, x):
|
||||
# Now it only takes a call to the layer to make predictions
|
||||
return self.linear(x)
|
||||
|
||||
parser = argparse.ArgumentParser(description='Program do uczenia modelu')
|
||||
parser.add_argument('-l', '--lr', type=float, default=1e-3, help="Współczynik uczenia (lr)", required=False)
|
||||
parser.add_argument('-e', '--epochs', type=int, default=100, help="Liczba epok", required=False)
|
||||
args = parser.parse_args()
|
||||
|
||||
if __name__ == "__main__":
|
||||
lr = args.lr
|
||||
n_epochs = args.epochs
|
||||
with mlflow.start_run():
|
||||
mlflow.log_param("lr", lr)
|
||||
mlflow.log_param("epochs", n_epochs)
|
||||
|
||||
train_dataset = torch.load('train_dataset.pt')
|
||||
#val_dataset = torch.load('val_dataset.pt')
|
||||
|
||||
train_loader = DataLoader(dataset=train_dataset)
|
||||
#val_loader = DataLoader(dataset=val_dataset)
|
||||
|
||||
model = LayerLinearRegression()
|
||||
# Checks model's parameters
|
||||
#print(model.state_dict())
|
||||
|
||||
loss_fn = nn.MSELoss(reduction='mean')
|
||||
optimizer = optim.SGD(model.parameters(), lr=lr)
|
||||
|
||||
def make_train_step(model, loss_fn, optimizer):
|
||||
# Builds function that performs a step in the train loop
|
||||
def train_step(x, y):
|
||||
# Sets model to TRAIN mode
|
||||
model.train()
|
||||
# Makes predictions
|
||||
yhat = model(x)
|
||||
# Computes loss
|
||||
loss = loss_fn(y, yhat)
|
||||
# Computes gradients
|
||||
loss.backward()
|
||||
# Updates parameters and zeroes gradients
|
||||
optimizer.step()
|
||||
optimizer.zero_grad()
|
||||
# Returns the loss
|
||||
return loss.item()
|
||||
|
||||
# Returns the function that will be called inside the train loop
|
||||
return train_step
|
||||
|
||||
# Creates the train_step function for our model, loss function and optimizer
|
||||
train_step = make_train_step(model, loss_fn, optimizer)
|
||||
training_losses = []
|
||||
validation_losses = []
|
||||
#print(model.state_dict())
|
||||
# For each epoch...
|
||||
for epoch in range(n_epochs):
|
||||
|
||||
losses = []
|
||||
# Uses loader to fetch one mini-batch for training
|
||||
for x_batch, y_batch in train_loader:
|
||||
# NOW, sends the mini-batch data to the device
|
||||
# so it matches location of the MODEL
|
||||
# x_batch = x_batch.to(device)
|
||||
# y_batch = y_batch.to(device)
|
||||
# One stpe of training
|
||||
loss = train_step(x_batch, y_batch)
|
||||
losses.append(loss)
|
||||
training_loss = np.mean(losses)
|
||||
training_losses.append(training_loss)
|
||||
|
||||
mlflow.log_metric("MSE", training_loss)
|
||||
|
||||
# After finishing training steps for all mini-batches,
|
||||
# it is time for evaluation!
|
||||
# Ewaluacja jest już tutaj nie potrzebna bo odbywa sie w evaluation.py. Można jednak włączyć podgląd ewaluacji dla poszczególnych epok.
|
||||
# # We tell PyTorch to NOT use autograd...
|
||||
# # Do you remember why?
|
||||
# with torch.no_grad():
|
||||
# val_losses = []
|
||||
# # Uses loader to fetch one mini-batch for validation
|
||||
# for x_val, y_val in val_loader:
|
||||
# # Again, sends data to same device as model
|
||||
# # x_val = x_val.to(device)
|
||||
# # y_val = y_val.to(device)
|
||||
|
||||
# model.eval()
|
||||
# # Makes predictions
|
||||
# yhat = model(x_val)
|
||||
# # Computes validation loss
|
||||
# val_loss = loss_fn(y_val, yhat)
|
||||
# val_losses.append(val_loss.item())
|
||||
# validation_loss = np.mean(val_losses)
|
||||
# validation_losses.append(validation_loss)
|
||||
|
||||
# print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}")
|
||||
print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t")
|
||||
|
||||
torch.save({
|
||||
'model_state_dict': model.state_dict(),
|
||||
'optimizer_state_dict': optimizer.state_dict(),
|
||||
'loss': lr,
|
||||
}, 'model.pt')
|
||||
|
||||
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
|
||||
|
||||
# Model registry does not work with file store
|
||||
if tracking_url_type_store != "file":
|
||||
|
||||
# Register the model
|
||||
# There are other ways to use the Model Registry, which depends on the use case,
|
||||
# please refer to the doc for more information:
|
||||
# https://mlflow.org/docs/latest/model-registry.html#api-workflow
|
||||
mlflow.sklearn.log_model(model, "model", registered_model_name="ElasticnetWineModel")
|
||||
else:
|
||||
mlflow.sklearn.log_model(model, "model")
|
13
train_mlflow/MLproject
Normal file
13
train_mlflow/MLproject
Normal file
@ -0,0 +1,13 @@
|
||||
name: 426206mlflow
|
||||
|
||||
#conda_env: conda.yaml #ścieżka do pliku conda.yaml z definicją środowiska
|
||||
|
||||
#docker_env:
|
||||
# image: mlflow-docker-example-environment
|
||||
|
||||
entry_points:
|
||||
main:
|
||||
parameters:
|
||||
epochs: {type: int, default: 100}
|
||||
lr: {type: float, default: 0.001}
|
||||
command: "python3 train_mlflow.py -e {epochs} -l {lr}"
|
Loading…
Reference in New Issue
Block a user