ium_478839/ml_pytorch.ipynb
2022-04-26 16:52:42 +02:00

9.1 KiB

import torch
import jovian
import torchvision
import matplotlib
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import torch.nn.functional as F
from torchvision.datasets.utils import download_url
from torch.utils.data import DataLoader, TensorDataset, random_split
import random
import os
import sys
#load data
dataframe = pd.read_csv("understat.csv")

#choose columns
input_cols=list(dataframe.columns)[4:11]
output_cols = ['position']
input_cols, output_cols
(['matches', 'wins', 'draws', 'loses', 'scored', 'missed', 'pts'],
 ['position'])
def dataframe_to_arrays(dataframe):
    dataframe_loc = dataframe.copy(deep=True)
    inputs_array = dataframe_loc[input_cols].to_numpy()
    targets_array = dataframe_loc[output_cols].to_numpy()
    return inputs_array, targets_array

inputs_array, targets_array = dataframe_to_arrays(dataframe)

inputs = torch.from_numpy(inputs_array).type(torch.float)
targets = torch.from_numpy(targets_array).type(torch.float)

dataset = TensorDataset(inputs, targets)
train_ds, val_ds = random_split(dataset, [548, 136])
batch_size=50
train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size)
class Model_xPosition(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size,output_size) 
        
    def forward(self, xb): 
        out = self.linear(xb)
        return out
    
    def training_step(self, batch):
        inputs, targets = batch 
        # Generate predictions
        out = self(inputs)          
        # Calcuate loss
        loss = F.l1_loss(out,targets) 
        return loss
    
    def validation_step(self, batch):
        inputs, targets = batch
        out = self(inputs)
        loss = F.l1_loss(out,targets)   
        return {'val_loss': loss.detach()}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean() 
        return {'val_loss': epoch_loss.item()}
    
    def epoch_end(self, epoch, result, num_epochs):
        if (epoch+1) % 100 == 0 or epoch == num_epochs-1:
            print("Epoch {} loss: {:.4f}".format(epoch+1, result['val_loss']))
            
            
def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result, epochs)
        history.append(result)
    return history
input_size = len(input_cols)
output_size = len(output_cols)
model=Model_xPosition()
epochs = 2000
lr = 1e-5
learning_proccess = fit(epochs, lr, model, train_loader, val_loader)
Epoch 100 loss: 6.2637
Epoch 200 loss: 2.9712
Epoch 300 loss: 1.9724
Epoch 400 loss: 1.9376
Epoch 500 loss: 1.9199
Epoch 600 loss: 1.9033
Epoch 700 loss: 1.8863
Epoch 800 loss: 1.8703
Epoch 900 loss: 1.8552
Epoch 1000 loss: 1.8405
Epoch 1100 loss: 1.8267
Epoch 1200 loss: 1.8134
Epoch 1300 loss: 1.8010
Epoch 1400 loss: 1.7876
Epoch 1500 loss: 1.7748
Epoch 1600 loss: 1.7626
Epoch 1700 loss: 1.7497
Epoch 1800 loss: 1.7387
Epoch 1900 loss: 1.7270
Epoch 2000 loss: 1.7162
def predict_single(input, target, model):
    inputs = input.unsqueeze(0)
    predictions = model(inputs)
    prediction = predictions[0].detach()

    return "Target: "+str(target)+"          Predicted: "+str(prediction)+"\n"
for i in random.sample(range(0, len(val_ds)), 10):
    input_, target = val_ds[i]
    print(predict_single(input_, target, model),end="")
Target: tensor([16.])          Predicted: tensor([13.5861])
Target: tensor([14.])          Predicted: tensor([10.1553])
Target: tensor([19.])          Predicted: tensor([16.5709])
Target: tensor([18.])          Predicted: tensor([18.5809])
Target: tensor([2.])          Predicted: tensor([2.5676])
Target: tensor([14.])          Predicted: tensor([13.4065])
Target: tensor([11.])          Predicted: tensor([11.6196])
Target: tensor([13.])          Predicted: tensor([13.1022])
Target: tensor([17.])          Predicted: tensor([14.5672])
Target: tensor([1.])          Predicted: tensor([-1.9346])
with open("result.txt", "w+") as file:
    for i in range(0, len(val_ds), 1):
        input_, target = val_ds[i]
        file.write(str(predict_single(input_, target, model)))
!jupyter nbconvert --to script ml_pytorch.ipynb
[NbConvertApp] Converting notebook ml_pytorch.ipynb to script
[NbConvertApp] Writing 3828 bytes to ml_pytorch.py