88 lines
2.6 KiB
Python
88 lines
2.6 KiB
Python
|
import sys
|
||
|
import torch
|
||
|
import torch.nn as nn
|
||
|
from sklearn import preprocessing
|
||
|
import numpy as np
|
||
|
import pandas as pd
|
||
|
np.set_printoptions(suppress=False)
|
||
|
|
||
|
|
||
|
class LogisticRegressionModel(nn.Module):
|
||
|
def __init__(self, input_dim, output_dim):
|
||
|
super(LogisticRegressionModel, self).__init__()
|
||
|
self.linear = nn.Linear(input_dim, output_dim)
|
||
|
self.sigmoid = nn.Sigmoid()
|
||
|
def forward(self, x):
|
||
|
out = self.linear(x)
|
||
|
return self.sigmoid(out)
|
||
|
|
||
|
train = pd.read_csv('train.csv')
|
||
|
test = pd.read_csv('test.csv')
|
||
|
|
||
|
categorical_cols = train.select_dtypes(include=object).columns.values
|
||
|
|
||
|
input_cols = train.columns.values[1:-1]
|
||
|
output_cols = train.columns.values[-1:]
|
||
|
|
||
|
|
||
|
def dataframe_to_arrays(dataframe):
|
||
|
# Make a copy of the original dataframe
|
||
|
dataframe1 = dataframe.copy(deep=True)
|
||
|
# Convert non-numeric categorical columns to numbers
|
||
|
for col in categorical_cols:
|
||
|
dataframe1[col] = dataframe1[col].astype('category').cat.codes
|
||
|
# Extract input & outupts as numpy arrays
|
||
|
|
||
|
min_max_scaler = preprocessing.MinMaxScaler()
|
||
|
x_scaled = min_max_scaler.fit_transform(dataframe1)
|
||
|
dataframe1 = pd.DataFrame(x_scaled, columns = dataframe1.columns)
|
||
|
|
||
|
inputs_array = dataframe1[input_cols].to_numpy()
|
||
|
targets_array = dataframe1[output_cols].to_numpy()
|
||
|
return inputs_array, targets_array
|
||
|
|
||
|
inputs_array_training, targets_array_training = dataframe_to_arrays(train)
|
||
|
|
||
|
|
||
|
inputs_array_testing, targets_array_testing = dataframe_to_arrays(test)
|
||
|
|
||
|
|
||
|
inputs_training = torch.from_numpy(inputs_array_training).type(torch.float32)
|
||
|
targets_training = torch.from_numpy(targets_array_training).type(torch.float32)
|
||
|
|
||
|
inputs_testing = torch.from_numpy(inputs_array_testing).type(torch.float32)
|
||
|
targets_testing = torch.from_numpy(targets_array_testing).type(torch.float32)
|
||
|
|
||
|
fTrain = inputs_training.values
|
||
|
tTrain = targets_training.values
|
||
|
|
||
|
fTest= inputs_testing.values
|
||
|
tTest = targets_testing.values
|
||
|
|
||
|
batch_size = 16
|
||
|
num_epochs = 5
|
||
|
learning_rate = 0.001
|
||
|
input_dim = 6
|
||
|
output_dim = 1
|
||
|
|
||
|
model = LogisticRegressionModel(input_dim, output_dim)
|
||
|
|
||
|
criterion = torch.nn.BCELoss(reduction='mean')
|
||
|
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)
|
||
|
|
||
|
for epoch in range(num_epochs):
|
||
|
# print ("Epoch #",epoch)
|
||
|
model.train()
|
||
|
optimizer.zero_grad()
|
||
|
# Forward pass
|
||
|
y_pred = model(fTrain)
|
||
|
# Compute Loss
|
||
|
loss = criterion(y_pred, tTrain)
|
||
|
# print(loss.item())
|
||
|
# Backward pass
|
||
|
loss.backward()
|
||
|
optimizer.step()
|
||
|
y_pred = model(fTest, return_dict=False)
|
||
|
print("predicted Y value: ", y_pred.data)
|
||
|
|
||
|
torch.save(model.state_dict(), 'stroke.pth')
|