2022-05-15 12:30:41 +02:00
|
|
|
import torch
|
|
|
|
import jovian
|
|
|
|
import torchvision
|
|
|
|
import matplotlib
|
|
|
|
import torch.nn as nn
|
|
|
|
import pandas as pd
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
import seaborn as sns
|
|
|
|
import torch.nn.functional as F
|
|
|
|
from torchvision.datasets.utils import download_url
|
|
|
|
from torch.utils.data import DataLoader, TensorDataset, random_split
|
|
|
|
import random
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import mlflow
|
|
|
|
from mlflow.models import infer_signature
|
|
|
|
from urllib.parse import urlparse
|
2022-05-15 13:24:18 +02:00
|
|
|
from sklearn.metrics import mean_squared_error
|
|
|
|
from sklearn.metrics import mean_absolute_error
|
2022-05-15 12:30:41 +02:00
|
|
|
|
|
|
|
mlflow.set_experiment("s478839")
|
|
|
|
|
|
|
|
def my_config():
|
|
|
|
epochs = 1000
|
|
|
|
|
|
|
|
#load data
|
|
|
|
dataframe = pd.read_csv("understat.csv")
|
|
|
|
|
|
|
|
#choose columns
|
|
|
|
input_cols=list(dataframe.columns)[4:11]
|
|
|
|
output_cols = ['position']
|
|
|
|
input_cols, output_cols
|
|
|
|
|
|
|
|
def dataframe_to_arrays(dataframe):
|
|
|
|
dataframe_loc = dataframe.copy(deep=True)
|
|
|
|
inputs_array = dataframe_loc[input_cols].to_numpy()
|
|
|
|
targets_array = dataframe_loc[output_cols].to_numpy()
|
|
|
|
return inputs_array, targets_array
|
|
|
|
|
|
|
|
inputs_array, targets_array = dataframe_to_arrays(dataframe)
|
|
|
|
|
|
|
|
inputs = torch.from_numpy(inputs_array).type(torch.float)
|
|
|
|
targets = torch.from_numpy(targets_array).type(torch.float)
|
|
|
|
|
|
|
|
dataset = TensorDataset(inputs, targets)
|
|
|
|
|
|
|
|
train_ds, val_ds = random_split(dataset, [548, 136])
|
|
|
|
batch_size=50
|
|
|
|
train_loader = DataLoader(train_ds, batch_size, shuffle=True)
|
|
|
|
val_loader = DataLoader(val_ds, batch_size)
|
|
|
|
|
|
|
|
class Model_xPosition(nn.Module):
|
|
|
|
def __init__(self):
|
|
|
|
super().__init__()
|
|
|
|
self.linear = nn.Linear(input_size,output_size)
|
|
|
|
|
|
|
|
def forward(self, xb):
|
|
|
|
out = self.linear(xb)
|
|
|
|
return out
|
|
|
|
|
|
|
|
def training_step(self, batch):
|
|
|
|
inputs, targets = batch
|
|
|
|
# Generate predictions
|
|
|
|
out = self(inputs)
|
|
|
|
# Calcuate loss
|
|
|
|
loss = F.l1_loss(out,targets)
|
|
|
|
return loss
|
|
|
|
|
|
|
|
def validation_step(self, batch):
|
|
|
|
inputs, targets = batch
|
|
|
|
out = self(inputs)
|
|
|
|
loss = F.l1_loss(out,targets)
|
|
|
|
return {'val_loss': loss.detach()}
|
|
|
|
|
|
|
|
def validation_epoch_end(self, outputs):
|
|
|
|
batch_losses = [x['val_loss'] for x in outputs]
|
|
|
|
epoch_loss = torch.stack(batch_losses).mean()
|
|
|
|
return {'val_loss': epoch_loss.item()}
|
|
|
|
|
|
|
|
def epoch_end(self, epoch, result, num_epochs):
|
|
|
|
if (epoch+1) % 100 == 0 or epoch == num_epochs-1:
|
|
|
|
print("Epoch {} loss: {:.4f}".format(epoch+1, result['val_loss']))
|
|
|
|
|
|
|
|
|
|
|
|
def evaluate(model, val_loader):
|
|
|
|
outputs = [model.validation_step(batch) for batch in val_loader]
|
|
|
|
return model.validation_epoch_end(outputs)
|
|
|
|
|
|
|
|
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
|
|
|
|
history = []
|
|
|
|
optimizer = opt_func(model.parameters(), lr)
|
|
|
|
for epoch in range(epochs):
|
|
|
|
for batch in train_loader:
|
|
|
|
loss = model.training_step(batch)
|
|
|
|
loss.backward()
|
|
|
|
optimizer.step()
|
|
|
|
optimizer.zero_grad()
|
|
|
|
result = evaluate(model, val_loader)
|
|
|
|
model.epoch_end(epoch, result, epochs)
|
|
|
|
history.append(result)
|
|
|
|
return history
|
|
|
|
|
|
|
|
def predict_single(input, target, model):
|
|
|
|
inputs = input.unsqueeze(0)
|
|
|
|
predictions = model(inputs)
|
|
|
|
prediction = predictions[0].detach()
|
|
|
|
|
|
|
|
return "Target: "+str(target)+" Predicted: "+str(prediction)+"\n"
|
|
|
|
|
2022-05-15 14:19:02 +02:00
|
|
|
def prediction(input, model):
|
2022-05-15 12:30:41 +02:00
|
|
|
inputs = input.unsqueeze(0)
|
|
|
|
predictions = model(inputs)
|
|
|
|
predicted = predictions[0].detach()
|
|
|
|
return predicted
|
|
|
|
|
|
|
|
input_size = len(input_cols)
|
|
|
|
output_size = len(output_cols)
|
|
|
|
model=Model_xPosition()
|
|
|
|
lr = 1e-5
|
|
|
|
|
|
|
|
# epochs = int(sys.argv[1]) if len(sys.argv) > 1 else 20
|
|
|
|
epochs = 1000
|
|
|
|
|
|
|
|
def my_main(epochs):
|
|
|
|
mlflow.log_param("epochs", epochs)
|
|
|
|
learning_proccess = fit(epochs, lr, model, train_loader, val_loader)
|
|
|
|
for i in random.sample(range(0, len(val_ds)), 10):
|
|
|
|
input_, target = val_ds[i]
|
|
|
|
print(predict_single(input_, target, model),end="")
|
|
|
|
|
|
|
|
expected = []
|
|
|
|
predicted = []
|
|
|
|
for i in range(0, len(val_ds), 1):
|
|
|
|
input_, target = val_ds[i]
|
|
|
|
expected.append(float(target))
|
2022-05-15 14:19:02 +02:00
|
|
|
predicted.append(float(prediction(input_, model)))
|
2022-05-15 12:30:41 +02:00
|
|
|
|
|
|
|
MSE = mean_squared_error(expected, predicted)
|
|
|
|
MAE = mean_absolute_error(expected, predicted)
|
|
|
|
|
|
|
|
mlflow.log_metric("MSE", MSE)
|
|
|
|
mlflow.log_metric("MAE", MAE)
|
|
|
|
|
|
|
|
|
|
|
|
with open("result.txt", "w+") as file:
|
|
|
|
for i in range(0, len(val_ds), 1):
|
|
|
|
input_, target = val_ds[i]
|
|
|
|
file.write(str(predict_single(input_, target, model)))
|
|
|
|
|
2022-05-15 16:00:31 +02:00
|
|
|
input_example = val_ds[0]
|
2022-05-15 14:24:04 +02:00
|
|
|
# input_example = input_example.unsqueeze(0)
|
2022-05-15 16:05:16 +02:00
|
|
|
signature = infer_signature(input_[0], prediction(input_[0], model))
|
2022-05-15 14:19:02 +02:00
|
|
|
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
|
|
|
|
|
|
|
|
if tracking_url_type_store != "file":
|
2022-05-15 14:22:58 +02:00
|
|
|
mlflow.pytorch.log_model(model, "model", registered_model_name="s478839", signature=siganture,
|
2022-05-15 14:19:02 +02:00
|
|
|
input_example=input_example)
|
|
|
|
else:
|
|
|
|
mlflow.pytorch.log_model(model, "model", signature=siganture, input_example=input_example)
|
|
|
|
mlflow.pytorch.save_model(model, "my_model", signature=siganture, input_example=input_example)
|
|
|
|
|
2022-05-15 12:30:41 +02:00
|
|
|
|
|
|
|
with mlflow.start_run() as run:
|
2022-05-15 14:19:02 +02:00
|
|
|
my_main(epochs)
|
|
|
|
|