training.py
All checks were successful
s434732-training/pipeline/head This commit looks good

This commit is contained in:
s434732 2021-05-15 15:34:10 +02:00
parent d2d986c556
commit 877975b3c4
4 changed files with 52 additions and 138 deletions

Binary file not shown.

View File

@ -1,5 +1,4 @@
import torch
import sys
from torch import nn
import numpy as np
import pandas as pd
@ -32,9 +31,9 @@ tTrain = torch.from_numpy(y_train.values.reshape(179,1))
fTest= torch.from_numpy(x_test.values)
tTest = torch.from_numpy(y_test.values)
batch_size = int(sys.argv[1]) if len(sys.argv) > 1 else 10
num_epochs = int(sys.argv[2]) if len(sys.argv) > 2 else 5
learning_rate = 0.001
batch_size = 10
num_epochs = 5
learning_rate = 0.002
input_dim = 11
output_dim = 1

View File

@ -9,12 +9,12 @@ pipeline {
name: 'WHICH_BUILD'
)
string(
defaultValue: '64',
defaultValue: '10',
description: 'batch size',
name: 'BATCH_SIZE'
)
string(
defaultValue: '100',
defaultValue: '5',
description: 'epochs',
name: 'EPOCHS'
@ -34,7 +34,7 @@ pipeline {
stage('archiveArtifacts') {
steps{
archiveArtifacts 'model_pred.txt'
archiveArtifacts 'FootballModel.pth'
archiveArtifacts 'DEATH_EVENT.pth'
}
}
}

View File

@ -1,146 +1,61 @@
import sys
import torch
import torch.nn as nn
import sys
from torch import nn
import numpy as np
import pandas as pd
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn import preprocessing
np.set_printoptions(suppress=False)
batch_size = int(sys.argv[1]) if len(sys.argv) > 1 else 64
epochs = int(sys.argv[2]) if len(sys.argv) > 2 else 100
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
categorical_cols = train.select_dtypes(include=object).columns.values
input_cols = train.columns.values[1:-1]
output_cols = train.columns.values[-1:]
class LogisticRegressionModel(nn.Module):
def __init__(self, input_dim, output_dim):
super(LogisticRegressionModel, self).__init__()
self.linear = nn.Linear(input_dim, output_dim)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.linear(x)
return self.sigmoid(out)
def dataframe_to_arrays(dataframe):
# Make a copy of the original dataframe
dataframe1 = dataframe.copy(deep=True)
# Convert non-numeric categorical columns to numbers
for col in categorical_cols:
dataframe1[col] = dataframe1[col].astype('category').cat.codes
# Extract input & outupts as numpy arrays
data_train = pd.read_csv("train.csv")
data_test = pd.read_csv("test.csv")
data_val = pd.read_csv("valid.csv")
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(dataframe1)
dataframe1 = pd.DataFrame(x_scaled, columns = dataframe1.columns)
x_train = data_train[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32)
y_train = data_train['DEATH_EVENT'].astype(np.float32)
inputs_array = dataframe1[input_cols].to_numpy()
targets_array = dataframe1[output_cols].to_numpy()
return inputs_array, targets_array
x_test = data_test[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32)
y_test = data_test['DEATH_EVENT'].astype(np.float32)
inputs_array_training, targets_array_training = dataframe_to_arrays(train)
fTrain = torch.from_numpy(x_train.values)
tTrain = torch.from_numpy(y_train.values.reshape(179,1))
fTest= torch.from_numpy(x_test.values)
tTest = torch.from_numpy(y_test.values)
inputs_array_testing, targets_array_testing = dataframe_to_arrays(test)
batch_size = int(sys.argv[1]) if len(sys.argv) > 1 else 10
num_epochs = int(sys.argv[2]) if len(sys.argv) > 2 else 5
learning_rate = 0.001
input_dim = 11
output_dim = 1
model = LogisticRegressionModel(input_dim, output_dim)
inputs_training = torch.from_numpy(inputs_array_training).type(torch.float32)
targets_training = torch.from_numpy(targets_array_training).type(torch.float32)
criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)
inputs_testing = torch.from_numpy(inputs_array_testing).type(torch.float32)
targets_testing = torch.from_numpy(targets_array_testing).type(torch.float32)
for epoch in range(num_epochs):
# print ("Epoch #",epoch)
model.train()
optimizer.zero_grad()
# Forward pass
y_pred = model(fTrain)
# Compute Loss
loss = criterion(y_pred, tTrain)
# print(loss.item())
# Backward pass
loss.backward()
optimizer.step()
y_pred = model(fTest)
print(y_pred.data)
train_dataset = TensorDataset(inputs_training, targets_training)
val_dataset = TensorDataset(inputs_testing, targets_testing)
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size*2)
input_size = len(input_cols)
output_size = len(output_cols)
class FootbalModel(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(input_size, output_size)
def forward(self, xb):
out = self.linear(xb)
return out
def training_step(self, batch):
inputs, targets = batch
# Generate predictions
out = self(inputs)
# Calcuate loss
# loss = F.l1_loss(out, targets)
loss = F.mse_loss(out, targets)
return loss
def validation_step(self, batch):
inputs, targets = batch
# Generate predictions
out = self(inputs)
# Calculate loss
# loss = F.l1_loss(out, targets)
loss = F.mse_loss(out, targets)
return {'val_loss': loss.detach()}
def validation_epoch_end(self, outputs):
batch_losses = [x['val_loss'] for x in outputs]
epoch_loss = torch.stack(batch_losses).mean()
return {'val_loss': epoch_loss.item()}
def epoch_end(self, epoch, result, num_epochs):
# Print result every 20th epoch
if (epoch + 1) % 20 == 0 or epoch == num_epochs - 1:
print("Epoch [{}], val_loss: {:.4f}".format(epoch + 1, result['val_loss']))
model = FootbalModel()
list(model.parameters())
def evaluate(model, val_loader):
outputs = [model.validation_step(batch) for batch in val_loader]
return model.validation_epoch_end(outputs)
def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
history = []
optimizer = opt_func(model.parameters(), lr)
for epoch in range(epochs):
# Training Phase
for batch in train_loader:
loss = model.training_step(batch)
loss.backward()
optimizer.step()
optimizer.zero_grad()
# Validation phase
result = evaluate(model, val_loader)
model.epoch_end(epoch, result, epochs)
history.append(result)
return history
result = evaluate(model, val_loader) # Use the the evaluate function
# epochs = 100
lr = 1e-6
history3 = fit(epochs, lr, model, train_loader, val_loader)
def predict_single(input, target, model):
inputs = input.unsqueeze(0)
predictions = model(input) # fill this
prediction = predictions[0].detach()
print("Prediction:", prediction)
if prediction >= 0.5:
print('Neutral')
else:
print('not neutral')
for i in range(len(val_dataset)):
input, target = val_dataset[i]
predict_single(input, target, model)
torch.save(model.state_dict(), 'FootballModel.pth')
torch.save(model.state_dict(), 'DEATH_EVENT.pth')