ium_434766/stroke-pytorch.py

72 lines
2.1 KiB
Python
Raw Normal View History

2021-04-17 13:35:20 +02:00
import torch
2021-05-07 21:30:35 +02:00
import sys
2021-04-17 13:35:20 +02:00
import torch.nn.functional as F
from torch import nn
from torch.autograd import Variable
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
2021-05-11 21:53:49 +02:00
from sacred import Experiment
from sacred.observers import FileStorageObserver
2021-05-07 21:30:35 +02:00
np.set_printoptions(suppress=False)
2021-05-13 23:06:29 +02:00
2021-04-17 13:35:20 +02:00
class LogisticRegressionModel(nn.Module):
def __init__(self, input_dim, output_dim):
super(LogisticRegressionModel, self).__init__()
self.linear = nn.Linear(input_dim, output_dim)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.linear(x)
return self.sigmoid(out)
2021-05-13 23:06:29 +02:00
2021-05-11 22:16:03 +02:00
data_train = pd.read_csv("data_train.csv")
data_test = pd.read_csv("data_test.csv")
data_val = pd.read_csv("data_val.csv")
FEATURES = ['age','hypertension','heart_disease','ever_married', 'avg_glucose_level', 'bmi']
2021-04-17 13:35:20 +02:00
2021-05-11 22:16:03 +02:00
x_train = data_train[FEATURES].astype(np.float32)
y_train = data_train['stroke'].astype(np.float32)
2021-04-17 13:35:20 +02:00
2021-05-11 22:16:03 +02:00
x_test = data_test[FEATURES].astype(np.float32)
y_test = data_test['stroke'].astype(np.float32)
2021-04-17 13:35:20 +02:00
2021-05-11 22:16:03 +02:00
fTrain = torch.from_numpy(x_train.values)
tTrain = torch.from_numpy(y_train.values.reshape(2945,1))
2021-04-17 13:35:20 +02:00
2021-05-11 22:16:03 +02:00
fTest= torch.from_numpy(x_test.values)
tTest = torch.from_numpy(y_test.values)
2021-04-17 13:35:20 +02:00
2021-05-11 22:16:03 +02:00
batch_size = int(sys.argv[1]) if len(sys.argv) > 1 else 16
num_epochs = int(sys.argv[2]) if len(sys.argv) > 2 else 5
learning_rate = 0.001
input_dim = 6
output_dim = 1
2021-05-13 23:06:29 +02:00
2021-05-11 22:16:03 +02:00
model = LogisticRegressionModel(input_dim, output_dim)
2021-04-17 13:35:20 +02:00
2021-05-11 22:16:03 +02:00
criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)
2021-04-17 13:35:20 +02:00
2021-05-11 22:16:03 +02:00
for epoch in range(num_epochs):
# print ("Epoch #",epoch)
model.train()
optimizer.zero_grad()
# Forward pass
y_pred = model(fTrain)
# Compute Loss
loss = criterion(y_pred, tTrain)
# print(loss.item())
# Backward pass
loss.backward()
optimizer.step()
y_pred = model(fTest)
print("predicted Y value: ", y_pred.data)
2021-04-17 13:35:20 +02:00
2021-05-11 22:16:03 +02:00
torch.save(model.state_dict(), 'stroke.pth')
2021-04-17 13:35:20 +02:00