From fae14d38c1371efe7602f04039ed1d7f0d3a111a Mon Sep 17 00:00:00 2001 From: s434732 Date: Wed, 9 Jun 2021 17:52:05 +0200 Subject: [PATCH] '.' --- .dvc/config | 4 +++ Dockerfile | 3 +++ JenkinsFileDvc | 49 ++++++++++++++++++++++++++++++++++++ dvc.yaml | 18 +++++++++++++ split_10.py | 28 +++++++++++++++++++++ train_10.py | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 170 insertions(+) create mode 100644 JenkinsFileDvc create mode 100644 dvc.yaml create mode 100644 split_10.py create mode 100644 train_10.py diff --git a/.dvc/config b/.dvc/config index e69de29..c02d6a2 100644 --- a/.dvc/config +++ b/.dvc/config @@ -0,0 +1,4 @@ +[core] + remote = ium_ssh_remote +['remote "ium_ssh_remote"'] + url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl/ium-sftp diff --git a/Dockerfile b/Dockerfile index 013ba47..fdd1083 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,6 +8,8 @@ RUN pip3 install torch torchvision torchaudio RUN apt install -y curl RUN pip3 install --user wget RUN pip3 install sacred && pip3 install GitPython && pip3 install pymongo +RUN pip3 install dvc +RUN pip3 install dvc[ssh] paramiko WORKDIR /app @@ -18,6 +20,7 @@ COPY ./IUM_05.py ./ COPY ./training.py ./ COPY ./mongoObserver.py ./ COPY ./fileObserver.py ./ +COPY ./fileObserver.py ./ RUN mkdir /.kaggle RUN chmod -R 777 /.kaggle diff --git a/JenkinsFileDvc b/JenkinsFileDvc new file mode 100644 index 0000000..5db166b --- /dev/null +++ b/JenkinsFileDvc @@ -0,0 +1,49 @@ +pipeline { + agent { + dockerfile true + } + parameters{ + buildSelector( + defaultSelector: lastSuccessful(), + description: 'Which build to use for copying artifacts', + name: 'WHICH_BUILD' + ) + string( + defaultValue: '10', + description: 'batch size', + name: 'BATCH_SIZE' + ) + string( + defaultValue: '5', + description: 'epochs', + name: 'EPOCHS' + + ) + } + stages { + stage('dvc') { + steps { + withCredentials([sshUserPrivateKey(credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18', keyFileVariable: 'IUM_SFTP_KEY', passphraseVariable: '', usernameVariable: '')]) { + copyArtifacts fingerprintArtifacts: true, projectName: 's434732-create-dataset', selector: buildParameter('WHICH_BUILD') + sh 'ssh ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl -i $IUM_SFTP_KEY' + sh 'dvc remote modify --local ium_ssh_remote keyfile $IUM_SFTP_KEY' + sh "dvc pull" + sh 'dvc pull' + sh "dvc reproduce" +} + + } + } + } + post { + success { + mail body: 'SUCCESS DVC', subject: 's434732', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms' + archiveArtifacts 'accuracy.txt' + + } + + failure { + mail body: 'FAILURE DVC', subject: 's434732', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms' + } + } +} \ No newline at end of file diff --git a/dvc.yaml b/dvc.yaml new file mode 100644 index 0000000..b128883 --- /dev/null +++ b/dvc.yaml @@ -0,0 +1,18 @@ +stages: + download_and_split: + cmd: python3 split_10.py + deps: + - heart_failure_clinical_records_dataset.csv + - split_10.py + outs: + - train.csv + - valid.csv + - test.csv + train_model: + cmd: python3 train_10.py + deps: + - train.csv + - valid.csv + - test.csv + outs: + - accuracy.txt \ No newline at end of file diff --git a/split_10.py b/split_10.py new file mode 100644 index 0000000..d28d79d --- /dev/null +++ b/split_10.py @@ -0,0 +1,28 @@ +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn.preprocessing import MinMaxScaler +import numpy as np + +results = pd.read_csv('heart_failure_clinical_records_dataset.csv') + +#brak wierszy z NaN +results.dropna() + +results = results.astype({"age": np.int64}) + + +for col in results.columns: + if results[col].dtype == np.float64: # FLOATS TO VALUES IN [ 0, 1] + dataReshaped = results[col].values.reshape(-1, 1) + scaler = MinMaxScaler(feature_range=(0, 1)) + results[col] = scaler.fit_transform(dataReshaped) + + +# PodziaƂ zbioru 6:1:1 +train, test = train_test_split(results, test_size= 1 - 0.6) + +valid, test = train_test_split(test, test_size=0.5) + +train.to_csv("train.csv", index=False) +valid.to_csv("valid.csv",index=False) +test.to_csv("test.csv",index=False) diff --git a/train_10.py b/train_10.py new file mode 100644 index 0000000..d42ce3b --- /dev/null +++ b/train_10.py @@ -0,0 +1,68 @@ +import torch +import sys +from torch import nn +import numpy as np +import pandas as pd +from sklearn.metrics import accuracy_score +from sklearn.metrics import f1_score +np.set_printoptions(suppress=False) + + +class LogisticRegressionModel(nn.Module): + def __init__(self, input_dim, output_dim): + super(LogisticRegressionModel, self).__init__() + self.linear = nn.Linear(input_dim, output_dim) + self.sigmoid = nn.Sigmoid() + def forward(self, x): + out = self.linear(x) + return self.sigmoid(out) + + +train = pd.read_csv("train.csv") +test = pd.read_csv("test.csv") +valid = pd.read_csv("valid.csv") + +xtrain = train[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32) +ytrain = train['DEATH_EVENT'].astype(np.float32) + +xtest = test[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32) +ytest = test['DEATH_EVENT'].astype(np.float32) + +xTrain = torch.from_numpy(xtrain.values) +yTrain = torch.from_numpy(ytrain.values.reshape(179,1)) + +xTest = torch.from_numpy(xtest.values) +yTest = torch.from_numpy(ytest.values) + +batch_size = int(sys.argv[1]) if len(sys.argv) > 1 else 10 +num_epochs = int(sys.argv[2]) if len(sys.argv) > 2 else 5 +learning_rate = 0.002 +input_dim = 11 +output_dim = 1 + +model = LogisticRegressionModel(input_dim, output_dim) + +criterion = torch.nn.BCELoss(reduction='mean') +optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate) + +for epoch in range(num_epochs): + # print ("Epoch #",epoch) + model.train() + optimizer.zero_grad() + # Forward pass + y_pred = model(xTrain) + # Compute Loss + loss = criterion(y_pred, yTrain) + # print(loss.item()) + # Backward pass + loss.backward() + optimizer.step() +predictions = model(xTest) + +accuracy_result = accuracy_score(yTest, np.argmax(predictions.detach().numpy(), axis=1)) +print("accuracy_score", accuracy_result) +print("F1", f1_score(yTest, np.argmax(predictions.detach().numpy(), axis=1), average=None)) + +text_file = open("accuracy.txt", "w") +n = text_file.write(f"accuracy: {accuracy_result}") +text_file.close() \ No newline at end of file