ium_478839/ml_pytorch_mlflow.py

176 lines
5.3 KiB
Python

import torch
import jovian
import torchvision
import matplotlib
import torch.nn as nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import torch.nn.functional as F
from torchvision.datasets.utils import download_url
from torch.utils.data import DataLoader, TensorDataset, random_split
import random
import os
import sys
import mlflow
from mlflow.models import infer_signature
from urllib.parse import urlparse
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
mlflow.set_experiment("s478839")
def my_config():
epochs = 1000
#load data
dataframe = pd.read_csv("understat.csv")
#choose columns
input_cols=list(dataframe.columns)[4:11]
output_cols = ['position']
input_cols, output_cols
def dataframe_to_arrays(dataframe):
dataframe_loc = dataframe.copy(deep=True)
inputs_array = dataframe_loc[input_cols].to_numpy()
targets_array = dataframe_loc[output_cols].to_numpy()
return inputs_array, targets_array
inputs_array, targets_array = dataframe_to_arrays(dataframe)
inputs = torch.from_numpy(inputs_array).type(torch.float)
targets = torch.from_numpy(targets_array).type(torch.float)
dataset = TensorDataset(inputs, targets)
train_ds, val_ds = random_split(dataset, [548, 136])
batch_size=50
train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size)
class Model_xPosition(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(input_size,output_size)
def forward(self, xb):
out = self.linear(xb)
return out
def training_step(self, batch):
inputs, targets = batch
# Generate predictions
out = self(inputs)
# Calcuate loss
loss = F.l1_loss(out,targets)
return loss
def validation_step(self, batch):
inputs, targets = batch
out = self(inputs)
loss = F.l1_loss(out,targets)
return {'val_loss': loss.detach()}
def validation_epoch_end(self, outputs):
batch_losses = [x['val_loss'] for x in outputs]
epoch_loss = torch.stack(batch_losses).mean()
return {'val_loss': epoch_loss.item()}
def epoch_end(self, epoch, result, num_epochs):
if (epoch+1) % 100 == 0 or epoch == num_epochs-1:
print("Epoch {} loss: {:.4f}".format(epoch+1, result['val_loss']))
def evaluate(model, val_loader):
outputs = [model.validation_step(batch) for batch in val_loader]
return model.validation_epoch_end(outputs)
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
history = []
optimizer = opt_func(model.parameters(), lr)
for epoch in range(epochs):
for batch in train_loader:
loss = model.training_step(batch)
loss.backward()
optimizer.step()
optimizer.zero_grad()
result = evaluate(model, val_loader)
model.epoch_end(epoch, result, epochs)
history.append(result)
return history
def predict_single(input, target, model):
inputs = input.unsqueeze(0)
predictions = model(inputs)
prediction = predictions[0].detach()
return "Target: "+str(target)+" Predicted: "+str(prediction)+"\n"
def prediction(input, model):
inputs = input.unsqueeze(0)
predictions = model(inputs)
predicted = predictions[0].detach()
return predicted
input_size = len(input_cols)
output_size = len(output_cols)
model=Model_xPosition()
lr = 1e-5
# epochs = int(sys.argv[1]) if len(sys.argv) > 1 else 20
epochs = 1000
def my_main(epochs):
mlflow.log_param("epochs", epochs)
learning_proccess = fit(epochs, lr, model, train_loader, val_loader)
for i in random.sample(range(0, len(val_ds)), 10):
input_, target = val_ds[i]
print(predict_single(input_, target, model),end="")
expected = []
predicted = []
inputss = []
targetss = []
for i in range(0, len(val_ds), 1):
input_, target = val_ds[i]
expected.append(float(target))
predicted.append(float(prediction(input_, model)))
inputss.append(input_)
# inputss, targetss = val_ds
inputsss = pd.DataFrame(inputss, dtype=np.float64)
# inputss = inputss.to_numpy()
# inputsss = inputss.values
MSE = mean_squared_error(expected, predicted)
MAE = mean_absolute_error(expected, predicted)
mlflow.log_metric("MSE", MSE)
mlflow.log_metric("MAE", MAE)
with open("result.txt", "w+") as file:
for i in range(0, len(val_ds), 1):
input_, target = val_ds[i]
file.write(str(predict_single(input_, target, model)))
print(inputsss)
input_example = inputsss[0]
signature = infer_signature(inputsss, expected)
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
if tracking_url_type_store != "file":
mlflow.pytorch.log_model(model, "model", registered_model_name="s478839", signature=siganture,
input_example=input_example)
else:
mlflow.pytorch.log_model(model, "model", signature=siganture, input_example=input_example)
mlflow.pytorch.save_model(model, "my_model", signature=siganture, input_example=input_example)
with mlflow.start_run() as run:
my_main(epochs)