74 lines
2.3 KiB
Python
74 lines
2.3 KiB
Python
|
import torch
|
||
|
import sys
|
||
|
import mlflow
|
||
|
import torch.nn.functional as F
|
||
|
from torch import nn
|
||
|
from sklearn.model_selection import train_test_split
|
||
|
from sklearn.metrics import accuracy_score, mean_squared_error
|
||
|
import numpy as np
|
||
|
import pandas as pd
|
||
|
np.set_printoptions(suppress=False)
|
||
|
|
||
|
|
||
|
class LogisticRegressionModel(nn.Module):
|
||
|
def __init__(self, input_dim, output_dim):
|
||
|
super(LogisticRegressionModel, self).__init__()
|
||
|
self.linear = nn.Linear(input_dim, output_dim)
|
||
|
self.sigmoid = nn.Sigmoid()
|
||
|
def forward(self, x):
|
||
|
out = self.linear(x)
|
||
|
return self.sigmoid(out)
|
||
|
|
||
|
with mlflow.start_run():
|
||
|
data_train = pd.read_csv("data_train.csv")
|
||
|
data_test = pd.read_csv("data_test.csv")
|
||
|
data_val = pd.read_csv("data_val.csv")
|
||
|
FEATURES = ['age','hypertension','heart_disease','ever_married', 'avg_glucose_level', 'bmi']
|
||
|
|
||
|
x_train = data_train[FEATURES].astype(np.float32)
|
||
|
y_train = data_train['stroke'].astype(np.float32)
|
||
|
|
||
|
x_test = data_test[FEATURES].astype(np.float32)
|
||
|
y_test = data_test['stroke'].astype(np.float32)
|
||
|
|
||
|
fTrain = torch.from_numpy(x_train.values)
|
||
|
tTrain = torch.from_numpy(y_train.values.reshape(2945,1))
|
||
|
|
||
|
fTest= torch.from_numpy(x_test.values)
|
||
|
tTest = torch.from_numpy(y_test.values)
|
||
|
|
||
|
batch_size = int(sys.argv[1]) if len(sys.argv) > 1 else 16
|
||
|
num_epochs = int(sys.argv[2]) if len(sys.argv) > 2 else 5
|
||
|
learning_rate = 0.001
|
||
|
input_dim = 6
|
||
|
output_dim = 1
|
||
|
|
||
|
model = LogisticRegressionModel(input_dim, output_dim)
|
||
|
|
||
|
criterion = torch.nn.BCELoss(reduction='mean')
|
||
|
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)
|
||
|
|
||
|
for epoch in range(num_epochs):
|
||
|
# print ("Epoch #",epoch)
|
||
|
model.train()
|
||
|
optimizer.zero_grad()
|
||
|
# Forward pass
|
||
|
y_pred = model(fTrain)
|
||
|
# Compute Loss
|
||
|
loss = criterion(y_pred, tTrain)
|
||
|
# print(loss.item())
|
||
|
# Backward pass
|
||
|
loss.backward()
|
||
|
optimizer.step()
|
||
|
y_pred = model(fTest)
|
||
|
# print("predicted Y value: ", y_pred.data)
|
||
|
|
||
|
torch.save(model.state_dict(), 'stroke.pth')
|
||
|
|
||
|
rmse = mean_squared_error(tTest, y_pred.detach().numpy())
|
||
|
mlflow.log_metric("rmse", rmse)
|
||
|
mlflow.log_param("Last loss", loss.item())
|
||
|
mlflow.log_param("epochs", num_epochs)
|
||
|
mlflow.log_param("batch size", batch_size)
|
||
|
|