Added sacred.
Some checks failed
s426206-training/pipeline/head There was a failure building this commit
Some checks failed
s426206-training/pipeline/head There was a failure building this commit
This commit is contained in:
parent
6a76b0713f
commit
06894754f8
1
.gitignore
vendored
1
.gitignore
vendored
@ -4,3 +4,4 @@ venv
|
||||
metrics.tsv
|
||||
*.pt
|
||||
plot.png
|
||||
my_runs
|
||||
|
@ -12,6 +12,7 @@ RUN chmod -R 777 /.kaggle
|
||||
COPY ./requirments.txt ./
|
||||
RUN pip3 install -r requirments.txt
|
||||
RUN pip3 install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
|
||||
RUN pip3 install sacred
|
||||
|
||||
# Stwórzmy w kontenerze (jeśli nie istnieje) katalog /app i przejdźmy do niego (wszystkie kolejne polecenia RUN, CMD, ENTRYPOINT, COPY i ADD będą w nim wykonywane)
|
||||
WORKDIR /app
|
||||
|
@ -5,21 +5,12 @@ import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from torch.utils.data import Dataset, TensorDataset, DataLoader
|
||||
import argparse
|
||||
from sacred import Experiment
|
||||
from sacred.observers import MongoObserver, FileStorageObserver
|
||||
|
||||
parser = argparse.ArgumentParser(description='Program do uczenia modelu')
|
||||
parser.add_argument('-l', '--lr', type=float, default=1e-3, help="Współczynik uczenia (lr)", required=False)
|
||||
parser.add_argument('-e', '--epochs', type=int, default=100, help="Liczba epok", required=False)
|
||||
args = parser.parse_args()
|
||||
|
||||
lr = args.lr
|
||||
n_epochs = args.epochs
|
||||
|
||||
train_dataset = torch.load('train_dataset.pt')
|
||||
#val_dataset = torch.load('val_dataset.pt')
|
||||
|
||||
train_loader = DataLoader(dataset=train_dataset)
|
||||
#val_loader = DataLoader(dataset=val_dataset)
|
||||
|
||||
ex = Experiment("426206", interactive=False, save_git_info=False)
|
||||
ex.observers.append(FileStorageObserver('my_runs'))
|
||||
ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@172.17.0.1:27017', db_name='sacred'))
|
||||
class LayerLinearRegression(nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
@ -30,6 +21,25 @@ class LayerLinearRegression(nn.Module):
|
||||
# Now it only takes a call to the layer to make predictions
|
||||
return self.linear(x)
|
||||
|
||||
# parser = argparse.ArgumentParser(description='Program do uczenia modelu')
|
||||
# parser.add_argument('-l', '--lr', type=float, default=1e-3, help="Współczynik uczenia (lr)", required=False)
|
||||
# parser.add_argument('-e', '--epochs', type=int, default=100, help="Liczba epok", required=False)
|
||||
# args = parser.parse_args()
|
||||
#python3 dlgssdpytorch.py with lr=0.01 n_epochs=10
|
||||
|
||||
@ex.config
|
||||
def my_config():
|
||||
lr = 1e-3
|
||||
n_epochs = 100
|
||||
|
||||
@ex.capture
|
||||
def train(lr, n_epochs, _run):
|
||||
train_dataset = torch.load('train_dataset.pt')
|
||||
#val_dataset = torch.load('val_dataset.pt')
|
||||
|
||||
train_loader = DataLoader(dataset=train_dataset)
|
||||
#val_loader = DataLoader(dataset=val_dataset)
|
||||
|
||||
model = LayerLinearRegression()
|
||||
# Checks model's parameters
|
||||
#print(model.state_dict())
|
||||
@ -64,6 +74,8 @@ validation_losses = []
|
||||
#print(model.state_dict())
|
||||
# For each epoch...
|
||||
for epoch in range(n_epochs):
|
||||
|
||||
_run.log_scalar("Epoch", str(epoch))
|
||||
losses = []
|
||||
# Uses loader to fetch one mini-batch for training
|
||||
for x_batch, y_batch in train_loader:
|
||||
@ -77,6 +89,8 @@ for epoch in range(n_epochs):
|
||||
training_loss = np.mean(losses)
|
||||
training_losses.append(training_loss)
|
||||
|
||||
_run.log_scalar("MSE", str(training_loss))
|
||||
|
||||
# After finishing training steps for all mini-batches,
|
||||
# it is time for evaluation!
|
||||
# Ewaluacja jest już tutaj nie potrzebna bo odbywa sie w evaluation.py. Można jednak włączyć podgląd ewaluacji dla poszczególnych epok.
|
||||
@ -107,3 +121,10 @@ torch.save({
|
||||
'optimizer_state_dict': optimizer.state_dict(),
|
||||
'loss': lr,
|
||||
}, 'model.pt')
|
||||
|
||||
@ex.automain
|
||||
def my_main(lr, n_epochs, _run):
|
||||
train(lr, n_epochs, _run)
|
||||
|
||||
ex.run()
|
||||
ex.add_artifact('model.pt')
|
||||
|
Loading…
Reference in New Issue
Block a user