ium_444409/train_model.py
Marcin Kostrzewski d794e390aa
Some checks failed
s444409-training/pipeline/head There was a failure building this commit
Add Sacred FileStorageObserver
2022-05-06 21:39:18 +02:00

145 lines
3.5 KiB
Python

import argparse
import numpy as np
import pandas as pd
import torch
from sacred.observers import FileStorageObserver
from torch import nn
from torch.utils.data import DataLoader, Dataset
from sacred import Experiment
default_batch_size = 64
default_epochs = 5
device = "cuda" if torch.cuda.is_available() else "cpu"
def hour_to_int(text: str):
return float(text.replace(':', ''))
def int_to_hour(num: int):
return str(num)
class PlantsDataset(Dataset):
def __init__(self, file_name):
df = pd.read_csv(file_name)
x = np.array([x[0].split(' ')[1] for x in df.iloc[:, 0:1].values])
y = df.iloc[:, 3].values
x_processed = np.array([hour_to_int(h) for h in x], dtype='float32')
self.x_train = torch.from_numpy(x_processed)
self.y_train = torch.from_numpy(y)
self.x_train.type(torch.LongTensor)
def __len__(self):
return len(self.y_train)
def __getitem__(self, idx):
return self.x_train[idx].float(), self.y_train[idx].float()
class MLP(nn.Module):
def __init__(self):
super().__init__()
self.layers = nn.Sequential(
nn.Linear(1, 64),
nn.ReLU(),
nn.Linear(64, 32),
nn.ReLU(),
nn.Linear(32, 1),
)
def forward(self, x):
x = x.view(x.size(0), -1)
return self.layers(x)
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
def test(dataloader, model, loss_fn):
num_batches = len(dataloader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
test_loss /= num_batches
print(f"Avg loss (using {loss_fn}): {test_loss:>8f} \n")
return test_loss
def main(batch_size, epochs):
print(f"Using {device} device")
plant_test = PlantsDataset('data/Plant_1_Generation_Data.csv.test')
plant_train = PlantsDataset('data/Plant_1_Generation_Data.csv.train')
train_dataloader = DataLoader(plant_train, batch_size=batch_size)
test_dataloader = DataLoader(plant_test, batch_size=batch_size)
for i, (data, labels) in enumerate(train_dataloader):
print(data.shape, labels.shape)
print(data, labels)
break
model = MLP()
print(model)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
for t in range(epochs):
print(f"Epoch {t + 1}\n-------------------------------")
train(train_dataloader, model, loss_fn, optimizer)
test(test_dataloader, model, loss_fn)
print("Done!")
torch.save(model.state_dict(), './model_out')
print("Model saved in ./model_out file.")
def setup_experiment():
ex = Experiment('Predict power output for a given time')
ex.observers.append(FileStorageObserver('sacred_runs'))
return ex
ex = setup_experiment()
@ex.config
def experiment_config():
batch_size = 64
epochs = 5
@ex.automain
def run(batch_size, epochs):
main(batch_size, epochs)
ex.add_artifact('model_out')