From f00f35a936f89897c603bcc0643c6ed82c5fafcc Mon Sep 17 00:00:00 2001 From: s434732 Date: Sat, 15 May 2021 15:54:08 +0200 Subject: [PATCH] evaluation.py --- DEATH_EVENT.pth | Bin 1151 -> 1151 bytes IUM_05.py | 31 +++---- Jenkinsfile_evaluation | 45 ++++++++++ Jenkinsfile_train | 5 +- evaluation.py | 181 ++++++++++------------------------------- training.py | 30 +++---- 6 files changed, 120 insertions(+), 172 deletions(-) diff --git a/DEATH_EVENT.pth b/DEATH_EVENT.pth index 6f723fae186c2e6fdd4dd400cccc2ffeecc7ddfe..98e7250733c7376f109ac6946d08f5adde4f0ed7 100644 GIT binary patch delta 243 zcmey*@tzOIp;QMuZubRz5{;!He1eE?lZ9I+2@ct#rg!ZpIv}AJ4f=L z@@qN_3=BGxCo;)S4r3Bhftd)h%gu>{Aqoma)=mKH2y@#%e{v7gG>9A2n5|eqW=u|D i)=+?H1vxvwn~@1@Yg@ytf$EnB%<}=EPfEZ5 delta 243 zcmey*@tYM$Lpomjg7Z+4C! zQr4w93=9l9lP5CCP7Y%dQh}NXw#&_lgCPnE+8^^+fppwld40m<9;Rs!H>fdNv4G5& joWiW3fb5t6Z$>7dxeOc}Ad-OrtZfam2C82kFwX}7A-qg7 diff --git a/IUM_05.py b/IUM_05.py index b633816..3032115 100644 --- a/IUM_05.py +++ b/IUM_05.py @@ -1,4 +1,5 @@ import torch +import sys from torch import nn import numpy as np import pandas as pd @@ -15,21 +16,21 @@ class LogisticRegressionModel(nn.Module): return self.sigmoid(out) -data_train = pd.read_csv("train.csv") -data_test = pd.read_csv("test.csv") -data_val = pd.read_csv("valid.csv") +train = pd.read_csv("train.csv") +test = pd.read_csv("test.csv") +valid = pd.read_csv("valid.csv") -x_train = data_train[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32) -y_train = data_train['DEATH_EVENT'].astype(np.float32) +xtrain = train[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32) +ytrain = train['DEATH_EVENT'].astype(np.float32) -x_test = data_test[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32) -y_test = data_test['DEATH_EVENT'].astype(np.float32) +xtest = test[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32) +ytest = test['DEATH_EVENT'].astype(np.float32) -fTrain = torch.from_numpy(x_train.values) -tTrain = torch.from_numpy(y_train.values.reshape(179,1)) +xTrain = torch.from_numpy(xtrain.values) +yTrain = torch.from_numpy(ytrain.values.reshape(179,1)) -fTest= torch.from_numpy(x_test.values) -tTest = torch.from_numpy(y_test.values) +xTest = torch.from_numpy(xtest.values) +yTest = torch.from_numpy(ytest.values) batch_size = 10 num_epochs = 5 @@ -39,7 +40,7 @@ output_dim = 1 model = LogisticRegressionModel(input_dim, output_dim) -criterion = torch.nn.BCELoss(reduction='mean') +criterion = torch.nn.BCELoss(reduction='mean') optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate) for epoch in range(num_epochs): @@ -47,14 +48,14 @@ for epoch in range(num_epochs): model.train() optimizer.zero_grad() # Forward pass - y_pred = model(fTrain) + y_pred = model(xTrain) # Compute Loss - loss = criterion(y_pred, tTrain) + loss = criterion(y_pred, yTrain) # print(loss.item()) # Backward pass loss.backward() optimizer.step() -y_pred = model(fTest) +y_pred = model(xTest) print(y_pred.data) torch.save(model.state_dict(), 'DEATH_EVENT.pth') diff --git a/Jenkinsfile_evaluation b/Jenkinsfile_evaluation index e69de29..1385ed8 100644 --- a/Jenkinsfile_evaluation +++ b/Jenkinsfile_evaluation @@ -0,0 +1,45 @@ +pipeline { + agent { + dockerfile true + } + parameters{ + buildSelector( + defaultSelector: lastSuccessful(), + description: 'Which build to use for copying artifacts', + name: 'WHICH_BUILD_DATA' + ) + buildSelector( + defaultSelector: lastSuccessful(), + description: 'Which build to use for copying artifacts', + name: 'WHICH_BUILD_TRAIN' + ) + } + stages { + stage('copyArtifacts') { + steps { + copyArtifacts fingerprintArtifacts: true, projectName: 's434732-create-dataset', selector: buildParameter('WHICH_BUILD_DATA') + } + } + stage('Run_script'){ + steps{ + copyArtifacts fingerprintArtifacts: true, projectName: 's434732-training/master', selector: buildParameter('WHICH_BUILD_TRAIN') + sh 'python3 "./evaluation.py" >> result.txt' + } + } + stage('archiveArtifacts') { + steps { + archiveArtifacts 'result.txt' + } + } + } + post { + success { + mail body: 'SUCCESS EVALUATION', subject: 's434732', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms' + } + + failure { + mail body: 'FAILURE EVALUATION', subject: 's434732', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms' + } + + } +} \ No newline at end of file diff --git a/Jenkinsfile_train b/Jenkinsfile_train index b559ad4..de54f6b 100644 --- a/Jenkinsfile_train +++ b/Jenkinsfile_train @@ -21,12 +21,12 @@ pipeline { ) } stages { - stage('checkout') { + stage('copyArtifacts') { steps { copyArtifacts fingerprintArtifacts: true, projectName: 's434732-create-dataset', selector: buildParameter('WHICH_BUILD') } } - stage('Docker'){ + stage('Run_script'){ steps{ sh 'python3 "./training.py" ${BATCH_SIZE} ${EPOCHS} > model_pred.txt' } @@ -40,6 +40,7 @@ pipeline { } post { success { + build job: 's434732-evaluation/master' mail body: 'SUCCESS TRAINING', subject: 's434732', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms' } diff --git a/evaluation.py b/evaluation.py index 1ffebd0..65c229a 100644 --- a/evaluation.py +++ b/evaluation.py @@ -1,153 +1,54 @@ -import sys import torch -import torch.nn as nn +import sys +from torch import nn +import numpy as np import pandas as pd -import torch.nn.functional as F -from torch.utils.data import DataLoader, TensorDataset, random_split -from sklearn import preprocessing +from sklearn.metrics import accuracy_score +from sklearn.metrics import f1_score +np.set_printoptions(suppress=False) -batch_size = 64 - -train = pd.read_csv('train.csv') -test = pd.read_csv('test.csv') - -categorical_cols = train.select_dtypes(include=object).columns.values - -input_cols = train.columns.values[1:-1] -output_cols = train.columns.values[-1:] +class LogisticRegressionModel(nn.Module): + def __init__(self, input_dim, output_dim): + super(LogisticRegressionModel, self).__init__() + self.linear = nn.Linear(input_dim, output_dim) + self.sigmoid = nn.Sigmoid() + def forward(self, x): + out = self.linear(x) + return self.sigmoid(out) -def dataframe_to_arrays(dataframe): - # Make a copy of the original dataframe - dataframe1 = dataframe.copy(deep=True) - # Convert non-numeric categorical columns to numbers - for col in categorical_cols: - dataframe1[col] = dataframe1[col].astype('category').cat.codes - # Extract input & outupts as numpy arrays +train = pd.read_csv("train.csv") +test = pd.read_csv("test.csv") +valid = pd.read_csv("valid.csv") - min_max_scaler = preprocessing.MinMaxScaler() - x_scaled = min_max_scaler.fit_transform(dataframe1) - dataframe1 = pd.DataFrame(x_scaled, columns = dataframe1.columns) +xtrain = train[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32) +ytrain = train['DEATH_EVENT'].astype(np.float32) - inputs_array = dataframe1[input_cols].to_numpy() - targets_array = dataframe1[output_cols].to_numpy() - return inputs_array, targets_array +xtest = test[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32) +ytest = test['DEATH_EVENT'].astype(np.float32) -inputs_array_training, targets_array_training = dataframe_to_arrays(train) +xTrain = torch.from_numpy(xtrain.values) +yTrain = torch.from_numpy(ytrain.values.reshape(179,1)) + +xTest = torch.from_numpy(xtest.values) +yTest = torch.from_numpy(ytest.values) + +batch_size = int(sys.argv[1]) if len(sys.argv) > 1 else 10 +num_epochs = int(sys.argv[2]) if len(sys.argv) > 2 else 5 +learning_rate = 0.002 +input_dim = 11 +output_dim = 1 + +model = LogisticRegressionModel(input_dim, output_dim) +model.load_state_dict(torch.load('DEATH_EVENT.pth')) +criterion = torch.nn.BCELoss(reduction='mean') +optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate) -inputs_array_testing, targets_array_testing = dataframe_to_arrays(test) +prediction= model(xTest) -inputs_training = torch.from_numpy(inputs_array_training).type(torch.float32) -targets_training = torch.from_numpy(targets_array_training).type(torch.float32) - -inputs_testing = torch.from_numpy(inputs_array_testing).type(torch.float32) -targets_testing = torch.from_numpy(targets_array_testing).type(torch.float32) - -train_dataset = TensorDataset(inputs_training, targets_training) -val_dataset = TensorDataset(inputs_testing, targets_testing) - -train_loader = DataLoader(train_dataset, batch_size, shuffle=True) -val_loader = DataLoader(val_dataset, batch_size*2) - -input_size = len(input_cols) -output_size = len(output_cols) - - - -class FootbalModel(nn.Module): - def __init__(self): - super().__init__() - self.linear = nn.Linear(input_size, output_size) - - def forward(self, xb): - out = self.linear(xb) - return out - - def training_step(self, batch): - inputs, targets = batch - # Generate predictions - out = self(inputs) - # Calcuate loss - # loss = F.l1_loss(out, targets) - loss = F.mse_loss(out, targets) - return loss - - def validation_step(self, batch): - inputs, targets = batch - # Generate predictions - out = self(inputs) - # Calculate loss - # loss = F.l1_loss(out, targets) - loss = F.mse_loss(out, targets) - return {'val_loss': loss.detach()} - - def validation_epoch_end(self, outputs): - batch_losses = [x['val_loss'] for x in outputs] - epoch_loss = torch.stack(batch_losses).mean() - return {'val_loss': epoch_loss.item()} - - def epoch_end(self, epoch, result, num_epochs): - # Print result every 20th epoch - if (epoch + 1) % 20 == 0 or epoch == num_epochs - 1: - print("Epoch [{}], val_loss: {:.4f}".format(epoch + 1, result['val_loss'])) - -model = FootbalModel() -model.load_state_dict(torch.load('FootballModel.pth')) -list(model.parameters()) - - -# def evaluate(model, val_loader): -# outputs = [model.validation_step(batch) for batch in val_loader] -# return model.validation_epoch_end(outputs) -# -# def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD): -# history = [] -# optimizer = opt_func(model.parameters(), lr) -# for epoch in range(epochs): -# # Training Phase -# for batch in train_loader: -# loss = model.training_step(batch) -# loss.backward() -# optimizer.step() -# optimizer.zero_grad() -# # Validation phase -# result = evaluate(model, val_loader) -# model.epoch_end(epoch, result, epochs) -# history.append(result) -# return history -# -# -# result = evaluate(model, val_loader) # Use the the evaluate function -# -# # epochs = 100 -# lr = 1e-6 -# history3 = fit(epochs, lr, model, train_loader, val_loader) -# -def predict_single(input, target, model): - inputs = input.unsqueeze(0) - predictions = model(input) - print(type(predictions))# fill this - prediction = predictions[0].detach() - print(prediction) - print("Prediction:", prediction) - if prediction >= 0.5: - print('Neutral') - else: - print('not neutral') - -# inputs_testing = torch.from_numpy(inputs_array_testing).type(torch.float32) -# targets_testing = torch.from_numpy(targets_array_testing).type(torch.float32) - -# inputs = input.unsqueeze(0) -# predictions = model(targets_testing) - - -for i in range(len(val_dataset)): - input, target = val_dataset[i] - predict_single(input, target, model) - - -# torch.save(model.state_dict(), 'FootballModel.pth') \ No newline at end of file +accuracy_score = accuracy_score(yTest, np.argmax(prediction.detach().numpy(), axis=1)) +print("accuracy_score", accuracy_score) +print("F1", f1_score(yTest, np.argmax(prediction.detach().numpy(), axis=1), average=None)) \ No newline at end of file diff --git a/training.py b/training.py index 36d0c00..6d168e6 100644 --- a/training.py +++ b/training.py @@ -16,25 +16,25 @@ class LogisticRegressionModel(nn.Module): return self.sigmoid(out) -data_train = pd.read_csv("train.csv") -data_test = pd.read_csv("test.csv") -data_val = pd.read_csv("valid.csv") +train = pd.read_csv("train.csv") +test = pd.read_csv("test.csv") +valid = pd.read_csv("valid.csv") -x_train = data_train[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32) -y_train = data_train['DEATH_EVENT'].astype(np.float32) +xtrain = train[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32) +ytrain = train['DEATH_EVENT'].astype(np.float32) -x_test = data_test[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32) -y_test = data_test['DEATH_EVENT'].astype(np.float32) +xtest = test[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32) +ytest = test['DEATH_EVENT'].astype(np.float32) -fTrain = torch.from_numpy(x_train.values) -tTrain = torch.from_numpy(y_train.values.reshape(179,1)) +xTrain = torch.from_numpy(xtrain.values) +yTrain = torch.from_numpy(ytrain.values.reshape(179,1)) -fTest= torch.from_numpy(x_test.values) -tTest = torch.from_numpy(y_test.values) +xTest = torch.from_numpy(xtest.values) +yTest = torch.from_numpy(ytest.values) batch_size = int(sys.argv[1]) if len(sys.argv) > 1 else 10 num_epochs = int(sys.argv[2]) if len(sys.argv) > 2 else 5 -learning_rate = 0.001 +learning_rate = 0.002 input_dim = 11 output_dim = 1 @@ -48,14 +48,14 @@ for epoch in range(num_epochs): model.train() optimizer.zero_grad() # Forward pass - y_pred = model(fTrain) + y_pred = model(xTrain) # Compute Loss - loss = criterion(y_pred, tTrain) + loss = criterion(y_pred, yTrain) # print(loss.item()) # Backward pass loss.backward() optimizer.step() -y_pred = model(fTest) +y_pred = model(xTest) print(y_pred.data) torch.save(model.state_dict(), 'DEATH_EVENT.pth')