This commit is contained in:
s434732 2021-06-09 17:52:05 +02:00
parent 9b5a1732b6
commit fae14d38c1
6 changed files with 170 additions and 0 deletions

View File

@ -0,0 +1,4 @@
[core]
remote = ium_ssh_remote
['remote "ium_ssh_remote"']
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl/ium-sftp

View File

@ -8,6 +8,8 @@ RUN pip3 install torch torchvision torchaudio
RUN apt install -y curl
RUN pip3 install --user wget
RUN pip3 install sacred && pip3 install GitPython && pip3 install pymongo
RUN pip3 install dvc
RUN pip3 install dvc[ssh] paramiko
WORKDIR /app
@ -18,6 +20,7 @@ COPY ./IUM_05.py ./
COPY ./training.py ./
COPY ./mongoObserver.py ./
COPY ./fileObserver.py ./
COPY ./fileObserver.py ./
RUN mkdir /.kaggle
RUN chmod -R 777 /.kaggle

49
JenkinsFileDvc Normal file
View File

@ -0,0 +1,49 @@
pipeline {
agent {
dockerfile true
}
parameters{
buildSelector(
defaultSelector: lastSuccessful(),
description: 'Which build to use for copying artifacts',
name: 'WHICH_BUILD'
)
string(
defaultValue: '10',
description: 'batch size',
name: 'BATCH_SIZE'
)
string(
defaultValue: '5',
description: 'epochs',
name: 'EPOCHS'
)
}
stages {
stage('dvc') {
steps {
withCredentials([sshUserPrivateKey(credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18', keyFileVariable: 'IUM_SFTP_KEY', passphraseVariable: '', usernameVariable: '')]) {
copyArtifacts fingerprintArtifacts: true, projectName: 's434732-create-dataset', selector: buildParameter('WHICH_BUILD')
sh 'ssh ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl -i $IUM_SFTP_KEY'
sh 'dvc remote modify --local ium_ssh_remote keyfile $IUM_SFTP_KEY'
sh "dvc pull"
sh 'dvc pull'
sh "dvc reproduce"
}
}
}
}
post {
success {
mail body: 'SUCCESS DVC', subject: 's434732', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
archiveArtifacts 'accuracy.txt'
}
failure {
mail body: 'FAILURE DVC', subject: 's434732', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
}
}
}

18
dvc.yaml Normal file
View File

@ -0,0 +1,18 @@
stages:
download_and_split:
cmd: python3 split_10.py
deps:
- heart_failure_clinical_records_dataset.csv
- split_10.py
outs:
- train.csv
- valid.csv
- test.csv
train_model:
cmd: python3 train_10.py
deps:
- train.csv
- valid.csv
- test.csv
outs:
- accuracy.txt

28
split_10.py Normal file
View File

@ -0,0 +1,28 @@
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import numpy as np
results = pd.read_csv('heart_failure_clinical_records_dataset.csv')
#brak wierszy z NaN
results.dropna()
results = results.astype({"age": np.int64})
for col in results.columns:
if results[col].dtype == np.float64: # FLOATS TO VALUES IN [ 0, 1]
dataReshaped = results[col].values.reshape(-1, 1)
scaler = MinMaxScaler(feature_range=(0, 1))
results[col] = scaler.fit_transform(dataReshaped)
# Podział zbioru 6:1:1
train, test = train_test_split(results, test_size= 1 - 0.6)
valid, test = train_test_split(test, test_size=0.5)
train.to_csv("train.csv", index=False)
valid.to_csv("valid.csv",index=False)
test.to_csv("test.csv",index=False)

68
train_10.py Normal file
View File

@ -0,0 +1,68 @@
import torch
import sys
from torch import nn
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
np.set_printoptions(suppress=False)
class LogisticRegressionModel(nn.Module):
def __init__(self, input_dim, output_dim):
super(LogisticRegressionModel, self).__init__()
self.linear = nn.Linear(input_dim, output_dim)
self.sigmoid = nn.Sigmoid()
def forward(self, x):
out = self.linear(x)
return self.sigmoid(out)
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
valid = pd.read_csv("valid.csv")
xtrain = train[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32)
ytrain = train['DEATH_EVENT'].astype(np.float32)
xtest = test[['age','anaemia','creatinine_phosphokinase','diabetes', 'ejection_fraction', 'high_blood_pressure', 'platelets', 'serum_creatinine', 'serum_sodium', 'sex', 'smoking']].astype(np.float32)
ytest = test['DEATH_EVENT'].astype(np.float32)
xTrain = torch.from_numpy(xtrain.values)
yTrain = torch.from_numpy(ytrain.values.reshape(179,1))
xTest = torch.from_numpy(xtest.values)
yTest = torch.from_numpy(ytest.values)
batch_size = int(sys.argv[1]) if len(sys.argv) > 1 else 10
num_epochs = int(sys.argv[2]) if len(sys.argv) > 2 else 5
learning_rate = 0.002
input_dim = 11
output_dim = 1
model = LogisticRegressionModel(input_dim, output_dim)
criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)
for epoch in range(num_epochs):
# print ("Epoch #",epoch)
model.train()
optimizer.zero_grad()
# Forward pass
y_pred = model(xTrain)
# Compute Loss
loss = criterion(y_pred, yTrain)
# print(loss.item())
# Backward pass
loss.backward()
optimizer.step()
predictions = model(xTest)
accuracy_result = accuracy_score(yTest, np.argmax(predictions.detach().numpy(), axis=1))
print("accuracy_score", accuracy_result)
print("F1", f1_score(yTest, np.argmax(predictions.detach().numpy(), axis=1), average=None))
text_file = open("accuracy.txt", "w")
n = text_file.write(f"accuracy: {accuracy_result}")
text_file.close()