Added sacred.
Some checks failed
s426206-training/pipeline/head There was a failure building this commit

This commit is contained in:
Jan Nowak 2021-05-16 13:28:57 +02:00
parent 6a76b0713f
commit 06894754f8
3 changed files with 108 additions and 85 deletions

1
.gitignore vendored
View File

@ -4,3 +4,4 @@ venv
metrics.tsv metrics.tsv
*.pt *.pt
plot.png plot.png
my_runs

View File

@ -12,6 +12,7 @@ RUN chmod -R 777 /.kaggle
COPY ./requirments.txt ./ COPY ./requirments.txt ./
RUN pip3 install -r requirments.txt RUN pip3 install -r requirments.txt
RUN pip3 install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html RUN pip3 install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
RUN pip3 install sacred
# Stwórzmy w kontenerze (jeśli nie istnieje) katalog /app i przejdźmy do niego (wszystkie kolejne polecenia RUN, CMD, ENTRYPOINT, COPY i ADD będą w nim wykonywane) # Stwórzmy w kontenerze (jeśli nie istnieje) katalog /app i przejdźmy do niego (wszystkie kolejne polecenia RUN, CMD, ENTRYPOINT, COPY i ADD będą w nim wykonywane)
WORKDIR /app WORKDIR /app

View File

@ -5,21 +5,12 @@ import torch.nn as nn
import torch.optim as optim import torch.optim as optim
from torch.utils.data import Dataset, TensorDataset, DataLoader from torch.utils.data import Dataset, TensorDataset, DataLoader
import argparse import argparse
from sacred import Experiment
from sacred.observers import MongoObserver, FileStorageObserver
parser = argparse.ArgumentParser(description='Program do uczenia modelu') ex = Experiment("426206", interactive=False, save_git_info=False)
parser.add_argument('-l', '--lr', type=float, default=1e-3, help="Współczynik uczenia (lr)", required=False) ex.observers.append(FileStorageObserver('my_runs'))
parser.add_argument('-e', '--epochs', type=int, default=100, help="Liczba epok", required=False) ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@172.17.0.1:27017', db_name='sacred'))
args = parser.parse_args()
lr = args.lr
n_epochs = args.epochs
train_dataset = torch.load('train_dataset.pt')
#val_dataset = torch.load('val_dataset.pt')
train_loader = DataLoader(dataset=train_dataset)
#val_loader = DataLoader(dataset=val_dataset)
class LayerLinearRegression(nn.Module): class LayerLinearRegression(nn.Module):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
@ -29,81 +20,111 @@ class LayerLinearRegression(nn.Module):
def forward(self, x): def forward(self, x):
# Now it only takes a call to the layer to make predictions # Now it only takes a call to the layer to make predictions
return self.linear(x) return self.linear(x)
model = LayerLinearRegression()
# Checks model's parameters
#print(model.state_dict())
loss_fn = nn.MSELoss(reduction='mean') # parser = argparse.ArgumentParser(description='Program do uczenia modelu')
optimizer = optim.SGD(model.parameters(), lr=lr) # parser.add_argument('-l', '--lr', type=float, default=1e-3, help="Współczynik uczenia (lr)", required=False)
# parser.add_argument('-e', '--epochs', type=int, default=100, help="Liczba epok", required=False)
# args = parser.parse_args()
#python3 dlgssdpytorch.py with lr=0.01 n_epochs=10
def make_train_step(model, loss_fn, optimizer): @ex.config
# Builds function that performs a step in the train loop def my_config():
def train_step(x, y): lr = 1e-3
# Sets model to TRAIN mode n_epochs = 100
model.train()
# Makes predictions
yhat = model(x)
# Computes loss
loss = loss_fn(y, yhat)
# Computes gradients
loss.backward()
# Updates parameters and zeroes gradients
optimizer.step()
optimizer.zero_grad()
# Returns the loss
return loss.item()
# Returns the function that will be called inside the train loop
return train_step
# Creates the train_step function for our model, loss function and optimizer @ex.capture
train_step = make_train_step(model, loss_fn, optimizer) def train(lr, n_epochs, _run):
training_losses = [] train_dataset = torch.load('train_dataset.pt')
validation_losses = [] #val_dataset = torch.load('val_dataset.pt')
#print(model.state_dict())
# For each epoch... train_loader = DataLoader(dataset=train_dataset)
for epoch in range(n_epochs): #val_loader = DataLoader(dataset=val_dataset)
losses = []
# Uses loader to fetch one mini-batch for training model = LayerLinearRegression()
for x_batch, y_batch in train_loader: # Checks model's parameters
# NOW, sends the mini-batch data to the device #print(model.state_dict())
# so it matches location of the MODEL
# x_batch = x_batch.to(device) loss_fn = nn.MSELoss(reduction='mean')
# y_batch = y_batch.to(device) optimizer = optim.SGD(model.parameters(), lr=lr)
# One stpe of training
loss = train_step(x_batch, y_batch) def make_train_step(model, loss_fn, optimizer):
losses.append(loss) # Builds function that performs a step in the train loop
training_loss = np.mean(losses) def train_step(x, y):
training_losses.append(training_loss) # Sets model to TRAIN mode
model.train()
# Makes predictions
yhat = model(x)
# Computes loss
loss = loss_fn(y, yhat)
# Computes gradients
loss.backward()
# Updates parameters and zeroes gradients
optimizer.step()
optimizer.zero_grad()
# Returns the loss
return loss.item()
# After finishing training steps for all mini-batches, # Returns the function that will be called inside the train loop
# it is time for evaluation! return train_step
# Ewaluacja jest już tutaj nie potrzebna bo odbywa sie w evaluation.py. Można jednak włączyć podgląd ewaluacji dla poszczególnych epok.
# # We tell PyTorch to NOT use autograd... # Creates the train_step function for our model, loss function and optimizer
# # Do you remember why? train_step = make_train_step(model, loss_fn, optimizer)
# with torch.no_grad(): training_losses = []
# val_losses = [] validation_losses = []
# # Uses loader to fetch one mini-batch for validation #print(model.state_dict())
# for x_val, y_val in val_loader: # For each epoch...
# # Again, sends data to same device as model for epoch in range(n_epochs):
# # x_val = x_val.to(device)
# # y_val = y_val.to(device) _run.log_scalar("Epoch", str(epoch))
losses = []
# Uses loader to fetch one mini-batch for training
for x_batch, y_batch in train_loader:
# NOW, sends the mini-batch data to the device
# so it matches location of the MODEL
# x_batch = x_batch.to(device)
# y_batch = y_batch.to(device)
# One stpe of training
loss = train_step(x_batch, y_batch)
losses.append(loss)
training_loss = np.mean(losses)
training_losses.append(training_loss)
_run.log_scalar("MSE", str(training_loss))
# model.eval() # After finishing training steps for all mini-batches,
# # Makes predictions # it is time for evaluation!
# yhat = model(x_val) # Ewaluacja jest już tutaj nie potrzebna bo odbywa sie w evaluation.py. Można jednak włączyć podgląd ewaluacji dla poszczególnych epok.
# # Computes validation loss # # We tell PyTorch to NOT use autograd...
# val_loss = loss_fn(y_val, yhat) # # Do you remember why?
# val_losses.append(val_loss.item()) # with torch.no_grad():
# validation_loss = np.mean(val_losses) # val_losses = []
# validation_losses.append(validation_loss) # # Uses loader to fetch one mini-batch for validation
# for x_val, y_val in val_loader:
# # Again, sends data to same device as model
# # x_val = x_val.to(device)
# # y_val = y_val.to(device)
# model.eval()
# # Makes predictions
# yhat = model(x_val)
# # Computes validation loss
# val_loss = loss_fn(y_val, yhat)
# val_losses.append(val_loss.item())
# validation_loss = np.mean(val_losses)
# validation_losses.append(validation_loss)
# print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}") # print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}")
print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t") print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t")
torch.save({ torch.save({
'model_state_dict': model.state_dict(), 'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(), 'optimizer_state_dict': optimizer.state_dict(),
'loss': lr, 'loss': lr,
}, 'model.pt') }, 'model.pt')
@ex.automain
def my_main(lr, n_epochs, _run):
train(lr, n_epochs, _run)
ex.run()
ex.add_artifact('model.pt')