evaluation.py
All checks were successful
s434732-evaluation/pipeline/head This commit looks good
s434732-training/pipeline/head This commit looks good

This commit is contained in:
s434732 2021-05-15 15:54:08 +02:00
parent 877975b3c4
commit f00f35a936
6 changed files with 120 additions and 172 deletions

Binary file not shown.

View File

@ -1,4 +1,5 @@
import torch import torch
import sys
from torch import nn from torch import nn
import numpy as np import numpy as np
import pandas as pd import pandas as pd
@ -15,21 +16,21 @@ class LogisticRegressionModel(nn.Module):
return self.sigmoid(out) return self.sigmoid(out)
data_train = pd.read_csv("train.csv") train = pd.read_csv("train.csv")
data_test = pd.read_csv("test.csv") test = pd.read_csv("test.csv")
data_val = pd.read_csv("valid.csv") valid = pd.read_csv("valid.csv")
x_train = data_train[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32) xtrain = train[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32)
y_train = data_train['DEATH_EVENT'].astype(np.float32) ytrain = train['DEATH_EVENT'].astype(np.float32)
x_test = data_test[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32) xtest = test[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32)
y_test = data_test['DEATH_EVENT'].astype(np.float32) ytest = test['DEATH_EVENT'].astype(np.float32)
fTrain = torch.from_numpy(x_train.values) xTrain = torch.from_numpy(xtrain.values)
tTrain = torch.from_numpy(y_train.values.reshape(179,1)) yTrain = torch.from_numpy(ytrain.values.reshape(179,1))
fTest= torch.from_numpy(x_test.values) xTest = torch.from_numpy(xtest.values)
tTest = torch.from_numpy(y_test.values) yTest = torch.from_numpy(ytest.values)
batch_size = 10 batch_size = 10
num_epochs = 5 num_epochs = 5
@ -39,7 +40,7 @@ output_dim = 1
model = LogisticRegressionModel(input_dim, output_dim) model = LogisticRegressionModel(input_dim, output_dim)
criterion = torch.nn.BCELoss(reduction='mean') criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate) optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)
for epoch in range(num_epochs): for epoch in range(num_epochs):
@ -47,14 +48,14 @@ for epoch in range(num_epochs):
model.train() model.train()
optimizer.zero_grad() optimizer.zero_grad()
# Forward pass # Forward pass
y_pred = model(fTrain) y_pred = model(xTrain)
# Compute Loss # Compute Loss
loss = criterion(y_pred, tTrain) loss = criterion(y_pred, yTrain)
# print(loss.item()) # print(loss.item())
# Backward pass # Backward pass
loss.backward() loss.backward()
optimizer.step() optimizer.step()
y_pred = model(fTest) y_pred = model(xTest)
print(y_pred.data) print(y_pred.data)
torch.save(model.state_dict(), 'DEATH_EVENT.pth') torch.save(model.state_dict(), 'DEATH_EVENT.pth')

View File

@ -0,0 +1,45 @@
pipeline {
agent {
dockerfile true
}
parameters{
buildSelector(
defaultSelector: lastSuccessful(),
description: 'Which build to use for copying artifacts',
name: 'WHICH_BUILD_DATA'
)
buildSelector(
defaultSelector: lastSuccessful(),
description: 'Which build to use for copying artifacts',
name: 'WHICH_BUILD_TRAIN'
)
}
stages {
stage('copyArtifacts') {
steps {
copyArtifacts fingerprintArtifacts: true, projectName: 's434732-create-dataset', selector: buildParameter('WHICH_BUILD_DATA')
}
}
stage('Run_script'){
steps{
copyArtifacts fingerprintArtifacts: true, projectName: 's434732-training/master', selector: buildParameter('WHICH_BUILD_TRAIN')
sh 'python3 "./evaluation.py" >> result.txt'
}
}
stage('archiveArtifacts') {
steps {
archiveArtifacts 'result.txt'
}
}
}
post {
success {
mail body: 'SUCCESS EVALUATION', subject: 's434732', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
}
failure {
mail body: 'FAILURE EVALUATION', subject: 's434732', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
}
}
}

View File

@ -21,12 +21,12 @@ pipeline {
) )
} }
stages { stages {
stage('checkout') { stage('copyArtifacts') {
steps { steps {
copyArtifacts fingerprintArtifacts: true, projectName: 's434732-create-dataset', selector: buildParameter('WHICH_BUILD') copyArtifacts fingerprintArtifacts: true, projectName: 's434732-create-dataset', selector: buildParameter('WHICH_BUILD')
} }
} }
stage('Docker'){ stage('Run_script'){
steps{ steps{
sh 'python3 "./training.py" ${BATCH_SIZE} ${EPOCHS} > model_pred.txt' sh 'python3 "./training.py" ${BATCH_SIZE} ${EPOCHS} > model_pred.txt'
} }
@ -40,6 +40,7 @@ pipeline {
} }
post { post {
success { success {
build job: 's434732-evaluation/master'
mail body: 'SUCCESS TRAINING', subject: 's434732', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms' mail body: 'SUCCESS TRAINING', subject: 's434732', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
} }

View File

@ -1,153 +1,54 @@
import sys
import torch import torch
import torch.nn as nn import sys
from torch import nn
import numpy as np
import pandas as pd import pandas as pd
import torch.nn.functional as F from sklearn.metrics import accuracy_score
from torch.utils.data import DataLoader, TensorDataset, random_split from sklearn.metrics import f1_score
from sklearn import preprocessing np.set_printoptions(suppress=False)
batch_size = 64 class LogisticRegressionModel(nn.Module):
def __init__(self, input_dim, output_dim):
train = pd.read_csv('train.csv') super(LogisticRegressionModel, self).__init__()
test = pd.read_csv('test.csv') self.linear = nn.Linear(input_dim, output_dim)
self.sigmoid = nn.Sigmoid()
categorical_cols = train.select_dtypes(include=object).columns.values def forward(self, x):
out = self.linear(x)
input_cols = train.columns.values[1:-1] return self.sigmoid(out)
output_cols = train.columns.values[-1:]
def dataframe_to_arrays(dataframe): train = pd.read_csv("train.csv")
# Make a copy of the original dataframe test = pd.read_csv("test.csv")
dataframe1 = dataframe.copy(deep=True) valid = pd.read_csv("valid.csv")
# Convert non-numeric categorical columns to numbers
for col in categorical_cols:
dataframe1[col] = dataframe1[col].astype('category').cat.codes
# Extract input & outupts as numpy arrays
min_max_scaler = preprocessing.MinMaxScaler() xtrain = train[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32)
x_scaled = min_max_scaler.fit_transform(dataframe1) ytrain = train['DEATH_EVENT'].astype(np.float32)
dataframe1 = pd.DataFrame(x_scaled, columns = dataframe1.columns)
inputs_array = dataframe1[input_cols].to_numpy() xtest = test[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32)
targets_array = dataframe1[output_cols].to_numpy() ytest = test['DEATH_EVENT'].astype(np.float32)
return inputs_array, targets_array
inputs_array_training, targets_array_training = dataframe_to_arrays(train) xTrain = torch.from_numpy(xtrain.values)
yTrain = torch.from_numpy(ytrain.values.reshape(179,1))
xTest = torch.from_numpy(xtest.values)
yTest = torch.from_numpy(ytest.values)
batch_size = int(sys.argv[1]) if len(sys.argv) > 1 else 10
num_epochs = int(sys.argv[2]) if len(sys.argv) > 2 else 5
learning_rate = 0.002
input_dim = 11
output_dim = 1
model = LogisticRegressionModel(input_dim, output_dim)
model.load_state_dict(torch.load('DEATH_EVENT.pth'))
criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)
inputs_array_testing, targets_array_testing = dataframe_to_arrays(test) prediction= model(xTest)
inputs_training = torch.from_numpy(inputs_array_training).type(torch.float32) accuracy_score = accuracy_score(yTest, np.argmax(prediction.detach().numpy(), axis=1))
targets_training = torch.from_numpy(targets_array_training).type(torch.float32) print("accuracy_score", accuracy_score)
print("F1", f1_score(yTest, np.argmax(prediction.detach().numpy(), axis=1), average=None))
inputs_testing = torch.from_numpy(inputs_array_testing).type(torch.float32)
targets_testing = torch.from_numpy(targets_array_testing).type(torch.float32)
train_dataset = TensorDataset(inputs_training, targets_training)
val_dataset = TensorDataset(inputs_testing, targets_testing)
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size*2)
input_size = len(input_cols)
output_size = len(output_cols)
class FootbalModel(nn.Module):
def __init__(self):
super().__init__()
self.linear = nn.Linear(input_size, output_size)
def forward(self, xb):
out = self.linear(xb)
return out
def training_step(self, batch):
inputs, targets = batch
# Generate predictions
out = self(inputs)
# Calcuate loss
# loss = F.l1_loss(out, targets)
loss = F.mse_loss(out, targets)
return loss
def validation_step(self, batch):
inputs, targets = batch
# Generate predictions
out = self(inputs)
# Calculate loss
# loss = F.l1_loss(out, targets)
loss = F.mse_loss(out, targets)
return {'val_loss': loss.detach()}
def validation_epoch_end(self, outputs):
batch_losses = [x['val_loss'] for x in outputs]
epoch_loss = torch.stack(batch_losses).mean()
return {'val_loss': epoch_loss.item()}
def epoch_end(self, epoch, result, num_epochs):
# Print result every 20th epoch
if (epoch + 1) % 20 == 0 or epoch == num_epochs - 1:
print("Epoch [{}], val_loss: {:.4f}".format(epoch + 1, result['val_loss']))
model = FootbalModel()
model.load_state_dict(torch.load('FootballModel.pth'))
list(model.parameters())
# def evaluate(model, val_loader):
# outputs = [model.validation_step(batch) for batch in val_loader]
# return model.validation_epoch_end(outputs)
#
# def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
# history = []
# optimizer = opt_func(model.parameters(), lr)
# for epoch in range(epochs):
# # Training Phase
# for batch in train_loader:
# loss = model.training_step(batch)
# loss.backward()
# optimizer.step()
# optimizer.zero_grad()
# # Validation phase
# result = evaluate(model, val_loader)
# model.epoch_end(epoch, result, epochs)
# history.append(result)
# return history
#
#
# result = evaluate(model, val_loader) # Use the the evaluate function
#
# # epochs = 100
# lr = 1e-6
# history3 = fit(epochs, lr, model, train_loader, val_loader)
#
def predict_single(input, target, model):
inputs = input.unsqueeze(0)
predictions = model(input)
print(type(predictions))# fill this
prediction = predictions[0].detach()
print(prediction)
print("Prediction:", prediction)
if prediction >= 0.5:
print('Neutral')
else:
print('not neutral')
# inputs_testing = torch.from_numpy(inputs_array_testing).type(torch.float32)
# targets_testing = torch.from_numpy(targets_array_testing).type(torch.float32)
# inputs = input.unsqueeze(0)
# predictions = model(targets_testing)
for i in range(len(val_dataset)):
input, target = val_dataset[i]
predict_single(input, target, model)
# torch.save(model.state_dict(), 'FootballModel.pth')

View File

@ -16,25 +16,25 @@ class LogisticRegressionModel(nn.Module):
return self.sigmoid(out) return self.sigmoid(out)
data_train = pd.read_csv("train.csv") train = pd.read_csv("train.csv")
data_test = pd.read_csv("test.csv") test = pd.read_csv("test.csv")
data_val = pd.read_csv("valid.csv") valid = pd.read_csv("valid.csv")
x_train = data_train[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32) xtrain = train[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32)
y_train = data_train['DEATH_EVENT'].astype(np.float32) ytrain = train['DEATH_EVENT'].astype(np.float32)
x_test = data_test[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32) xtest = test[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32)
y_test = data_test['DEATH_EVENT'].astype(np.float32) ytest = test['DEATH_EVENT'].astype(np.float32)
fTrain = torch.from_numpy(x_train.values) xTrain = torch.from_numpy(xtrain.values)
tTrain = torch.from_numpy(y_train.values.reshape(179,1)) yTrain = torch.from_numpy(ytrain.values.reshape(179,1))
fTest= torch.from_numpy(x_test.values) xTest = torch.from_numpy(xtest.values)
tTest = torch.from_numpy(y_test.values) yTest = torch.from_numpy(ytest.values)
batch_size = int(sys.argv[1]) if len(sys.argv) > 1 else 10 batch_size = int(sys.argv[1]) if len(sys.argv) > 1 else 10
num_epochs = int(sys.argv[2]) if len(sys.argv) > 2 else 5 num_epochs = int(sys.argv[2]) if len(sys.argv) > 2 else 5
learning_rate = 0.001 learning_rate = 0.002
input_dim = 11 input_dim = 11
output_dim = 1 output_dim = 1
@ -48,14 +48,14 @@ for epoch in range(num_epochs):
model.train() model.train()
optimizer.zero_grad() optimizer.zero_grad()
# Forward pass # Forward pass
y_pred = model(fTrain) y_pred = model(xTrain)
# Compute Loss # Compute Loss
loss = criterion(y_pred, tTrain) loss = criterion(y_pred, yTrain)
# print(loss.item()) # print(loss.item())
# Backward pass # Backward pass
loss.backward() loss.backward()
optimizer.step() optimizer.step()
y_pred = model(fTest) y_pred = model(xTest)
print(y_pred.data) print(y_pred.data)
torch.save(model.state_dict(), 'DEATH_EVENT.pth') torch.save(model.state_dict(), 'DEATH_EVENT.pth')