ium_434732/IUM_08_mlflow.py
2021-05-16 15:24:58 +02:00

79 lines
2.7 KiB
Python

import torch
import sys
from torch import nn
import numpy as np
import pandas as pd
import mlflow
from sklearn.metrics import accuracy_score
np.set_printoptions(suppress=False)
class LogisticRegressionModel(nn.Module):
def __init__(self, input_dim, output_dim):
super(LogisticRegressionModel, self).__init__()
self.linear = nn.Linear(input_dim, output_dim)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.linear(x)
return self.sigmoid(out)
def readAndtrain(epochs, batch_size):
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
xtrain = train[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32)
ytrain = train['DEATH_EVENT'].astype(np.float32)
xtest = test[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32)
ytest = test['DEATH_EVENT'].astype(np.float32)
xTrain = torch.from_numpy(xtrain.values)
yTrain = torch.from_numpy(ytrain.values.reshape(179,1))
xTest = torch.from_numpy(xtest.values)
yTest = torch.from_numpy(ytest.values)
learning_rate = 0.002
input_dim = 11
output_dim = 1
model = LogisticRegressionModel(input_dim, output_dim)
model.load_state_dict(torch.load('DEATH_EVENT.pth'))
criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
for epoch in range(epochs):
# print ("Epoch #",epoch)
model.train()
optimizer.zero_grad()
# Forward pass
y_pred = model(xTrain)
# Compute Loss
loss = criterion(y_pred, yTrain)
# print(loss.item())
# Backward pass
loss.backward()
optimizer.step()
torch.save(model.state_dict(), 'DEATH_EVENT.pth')
prediction= model(xTest)
return prediction, loss.item(), yTest
print("accuracy_score", accuracy_score(yTest, np.argmax(prediction.detach().numpy(), axis=1)))
# print("F1", f1_score(yTest, np.argmax(prediction.detach().numpy(), axis=1), average=None))
batch_size = int(sys.argv[1]) if len(sys.argv) > 1 else 10
epochs = int(sys.argv[2]) if len(sys.argv) > 2 else 5
with mlflow.start_run():
prediction, loss, yTest = readAndtrain(epochs, batch_size)
mlflow.log_metric("accuracy_score", accuracy_score(yTest, np.argmax(prediction.detach().numpy(), axis=1)))
mlflow.log_param("Last loss", loss)
mlflow.log_param("epochs", epochs)
mlflow.log_param("batch size", batch_size)