diff --git a/Jenkinsfile b/Jenkinsfile index 4faa5a8..63f9b49 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -29,16 +29,13 @@ node { "KAGGLE_KEY=${params.KAGGLE_KEY}", "CUTOFF=${params.CUTOFF}"]) { sh "./script.sh ${CUTOFF}" - sh "./learning.py" } - } stage('artifacts') { echo 'saving artifacts' archiveArtifacts 'output.txt' - archiveArtifacts 'model.pt' } } } diff --git a/JenkinsfileTrain b/JenkinsfileTrain new file mode 100644 index 0000000..b4743eb --- /dev/null +++ b/JenkinsfileTrain @@ -0,0 +1,35 @@ +node { + checkout scm + def dockerimage = docker.build("titanic-image") + dockerimage.inside { + stage('Preparation') { + properties([ + parameters([ + string( + defaultValue: 'default', + description: 'Number of head lines to be taken from test file', + name: 'LEARNING_PARAMETERS', + trim: false) + ]) + ]) + copyArtifacts projectName: 's470618-create-dataset', filter: '*.csv', fingerprintArtifacts: true, selector: lastSuccessful(), target: '.' + } + stage('Build') { + withEnv(["LEARNING_PARAMETERS"=${params.LEARNING_PARAMETERS}]) { + sh "./learning.py ${LEARNING_PARAMETERS}" + } + } + + + stage('artifacts') { + echo 'saving artifacts' + archiveArtifacts 'model.pt' +} +stage('Trigger Learning pipeline') { + steps { + build 's470618-training' + } + } +} +} + diff --git a/learning.py b/learning.py index e0c2250..6ccaf74 100755 --- a/learning.py +++ b/learning.py @@ -4,6 +4,7 @@ import torch from torch import nn import pandas as pd import subprocess +import sys from sklearn.model_selection import train_test_split import torch.nn.functional as F @@ -27,7 +28,15 @@ def print_(loss): print ("The loss calculated: ", loss) if __name__ == "__main__": - df = pd.read_csv("train.csv") + + if sys.argv[1]=='default': + alpha = 0.003 #learning rate + epochs = 1000 + else: + pass + #TODO split args string to make hyperparameters work + + df = pd.read_csv("output.csv") df = df.dropna() #drop NA values columns_to_normalize=['Age','Fare'] #NORMALIZATION @@ -52,9 +61,8 @@ if __name__ == "__main__": Yt = torch.tensor(Y_train, dtype=torch.long) model = Model(Xt.shape[1]) - optimizer = torch.optim.Adam(model.parameters(), lr=0.01) + optimizer = torch.optim.Adam(model.parameters(), lr=alpha) loss_fn = nn.CrossEntropyLoss() - epochs = 1000 #TRAINING LOOP for epoch in range(1, epochs+1): diff --git a/script.sh b/script.sh index 8077f6a..56792f2 100755 --- a/script.sh +++ b/script.sh @@ -3,5 +3,5 @@ kaggle competitions download -c titanic echo 'kaggle download completed' unzip titanic.zip wc -l train.csv -head -$1 train.csv | shuf > output.txt +head -$1 train.csv | shuf > output.csv echo 'script done'