From d95bf0d73d616d25b5cad52d214b644e8c659ec5 Mon Sep 17 00:00:00 2001 From: Marcin Kostrzewski Date: Sun, 24 Apr 2022 22:20:14 +0200 Subject: [PATCH] Added regression model training --- requirements.txt | 4 +- train_model.py | 111 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 train_model.py diff --git a/requirements.txt b/requirements.txt index 3a40842..08cc030 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ kaggle==1.5.12 -pandas==1.4.1 \ No newline at end of file +pandas==1.4.1 +torch==1.11.0 +numpy~=1.22.3 \ No newline at end of file diff --git a/train_model.py b/train_model.py new file mode 100644 index 0000000..bdf091a --- /dev/null +++ b/train_model.py @@ -0,0 +1,111 @@ +import numpy as np +import pandas as pd +import torch +from torch import nn +from torch.utils.data import DataLoader, Dataset + + +def hour_to_int(text: str): + return float(text.replace(':', '')) + + +def int_to_hour(num: int): + return str(num) + + +class PlantsDataset(Dataset): + def __init__(self, file_name): + df = pd.read_csv(file_name) + + x = np.array([x[0].split(' ')[1] for x in df.iloc[:, 0:1].values]) + y = df.iloc[:, 3].values + + x_processed = np.array([hour_to_int(h) for h in x], dtype='float32') + + self.x_train = torch.from_numpy(x_processed) + self.y_train = torch.from_numpy(y) + self.x_train.type(torch.LongTensor) + + def __len__(self): + return len(self.y_train) + + def __getitem__(self, idx): + return self.x_train[idx].float(), self.y_train[idx].float() + + +class MLP(nn.Module): + def __init__(self): + super().__init__() + self.layers = nn.Sequential( + nn.Linear(1, 64), + nn.ReLU(), + nn.Linear(64, 32), + nn.ReLU(), + nn.Linear(32, 1), + ) + + def forward(self, x): + x = x.view(x.size(0), -1) + return self.layers(x) + + +def train(dataloader, model, loss_fn, optimizer): + size = len(dataloader.dataset) + model.train() + for batch, (X, y) in enumerate(dataloader): + X, y = X.to(device), y.to(device) + + # Compute prediction error + pred = model(X) + loss = loss_fn(pred, y) + + # Backpropagation + optimizer.zero_grad() + loss.backward() + optimizer.step() + + if batch % 100 == 0: + loss, current = loss.item(), batch * len(X) + print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]") + + +def test(dataloader, model, loss_fn): + num_batches = len(dataloader) + model.eval() + test_loss, correct = 0, 0 + with torch.no_grad(): + for X, y in dataloader: + X, y = X.to(device), y.to(device) + pred = model(X) + test_loss += loss_fn(pred, y).item() + test_loss /= num_batches + print(f"Avg loss: {test_loss:>8f} \n") + + +device = "cuda" if torch.cuda.is_available() else "cpu" +print(f"Using {device} device") + +batch_size = 64 + +plant_test = PlantsDataset('data/Plant_1_Generation_Data.csv.test') +plant_train = PlantsDataset('data/Plant_1_Generation_Data.csv.train') + +train_dataloader = DataLoader(plant_train, batch_size=batch_size) +test_dataloader = DataLoader(plant_test, batch_size=batch_size) + +for i, (data, labels) in enumerate(train_dataloader): + print(data.shape, labels.shape) + print(data, labels) + break + +model = MLP() +print(model) + +loss_fn = nn.MSELoss() +optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) +epochs = 5 +for t in range(epochs): + print(f"Epoch {t + 1}\n-------------------------------") + train(train_dataloader, model, loss_fn, optimizer) + test(test_dataloader, model, loss_fn) +print("Done!")