import torch import jovian import torchvision import matplotlib import torch.nn as nn import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import torch.nn.functional as F from torchvision.datasets.utils import download_url from torch.utils.data import DataLoader, TensorDataset, random_split import random import os import sys import mlflow from mlflow.models import infer_signature from urllib.parse import urlparse from sklearn.metrics import mean_squared_error from sklearn.metrics import mean_absolute_error mlflow.set_experiment("s478839") def my_config(): epochs = 1000 #load data dataframe = pd.read_csv("understat.csv") #choose columns input_cols=list(dataframe.columns)[4:11] output_cols = ['position'] input_cols, output_cols def dataframe_to_arrays(dataframe): dataframe_loc = dataframe.copy(deep=True) inputs_array = dataframe_loc[input_cols].to_numpy() targets_array = dataframe_loc[output_cols].to_numpy() return inputs_array, targets_array inputs_array, targets_array = dataframe_to_arrays(dataframe) inputs = torch.from_numpy(inputs_array).type(torch.float) targets = torch.from_numpy(targets_array).type(torch.float) dataset = TensorDataset(inputs, targets) train_ds, val_ds = random_split(dataset, [548, 136]) batch_size=50 train_loader = DataLoader(train_ds, batch_size, shuffle=True) val_loader = DataLoader(val_ds, batch_size) class Model_xPosition(nn.Module): def __init__(self): super().__init__() self.linear = nn.Linear(input_size,output_size) def forward(self, xb): out = self.linear(xb) return out def training_step(self, batch): inputs, targets = batch # Generate predictions out = self(inputs) # Calcuate loss loss = F.l1_loss(out,targets) return loss def validation_step(self, batch): inputs, targets = batch out = self(inputs) loss = F.l1_loss(out,targets) return {'val_loss': loss.detach()} def validation_epoch_end(self, outputs): batch_losses = [x['val_loss'] for x in outputs] epoch_loss = torch.stack(batch_losses).mean() return {'val_loss': epoch_loss.item()} def epoch_end(self, epoch, result, num_epochs): if (epoch+1) % 100 == 0 or epoch == num_epochs-1: print("Epoch {} loss: {:.4f}".format(epoch+1, result['val_loss'])) def evaluate(model, val_loader): outputs = [model.validation_step(batch) for batch in val_loader] return model.validation_epoch_end(outputs) def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD): history = [] optimizer = opt_func(model.parameters(), lr) for epoch in range(epochs): for batch in train_loader: loss = model.training_step(batch) loss.backward() optimizer.step() optimizer.zero_grad() result = evaluate(model, val_loader) model.epoch_end(epoch, result, epochs) history.append(result) return history def predict_single(input, target, model): inputs = input.unsqueeze(0) predictions = model(inputs) prediction = predictions[0].detach() return "Target: "+str(target)+" Predicted: "+str(prediction)+"\n" def prediction(input, model): inputs = input.unsqueeze(0) predictions = model(inputs) predicted = predictions[0].detach() return predicted input_size = len(input_cols) output_size = len(output_cols) model=Model_xPosition() lr = 1e-5 # epochs = int(sys.argv[1]) if len(sys.argv) > 1 else 20 epochs = 1000 def my_main(epochs): mlflow.log_param("epochs", epochs) learning_proccess = fit(epochs, lr, model, train_loader, val_loader) for i in random.sample(range(0, len(val_ds)), 10): input_, target = val_ds[i] print(predict_single(input_, target, model),end="") expected = [] predicted = [] inputs = [] for i in range(0, len(val_ds), 1): input_, target = val_ds[i] expected.append(float(target)) predicted.append(float(prediction(input_, model))) inputs = inputs.append(input_) inputs = pd.DataFrame(inputs, dtype=np.float64) inputs = inputs.to_numpy() MSE = mean_squared_error(expected, predicted) MAE = mean_absolute_error(expected, predicted) mlflow.log_metric("MSE", MSE) mlflow.log_metric("MAE", MAE) with open("result.txt", "w+") as file: for i in range(0, len(val_ds), 1): input_, target = val_ds[i] file.write(str(predict_single(input_, target, model))) print(inputs) input_example = inputs[0] signature = infer_signature(inputs, expected) tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme if tracking_url_type_store != "file": mlflow.pytorch.log_model(model, "model", registered_model_name="s478839", signature=siganture, input_example=input_example) else: mlflow.pytorch.log_model(model, "model", signature=siganture, input_example=input_example) mlflow.pytorch.save_model(model, "my_model", signature=siganture, input_example=input_example) with mlflow.start_run() as run: my_main(epochs)