From 8232d89f0d3a04fc052a3af092dcc36071ca2f89 Mon Sep 17 00:00:00 2001 From: s424714 Date: Sun, 7 May 2023 19:12:15 +0200 Subject: [PATCH] feat: add jenkins training process --- Jenkinsfile-training | 61 ++++++++++++++++++++++++++++++++++++++++++++ src/main.py | 20 ++++++++++----- 2 files changed, 74 insertions(+), 7 deletions(-) create mode 100644 Jenkinsfile-training diff --git a/Jenkinsfile-training b/Jenkinsfile-training new file mode 100644 index 0000000..7542f0b --- /dev/null +++ b/Jenkinsfile-training @@ -0,0 +1,61 @@ +node { + stage('Preparation') { + properties([ + parameters([ + string( + defaultValue: '2', + description: 'Batch size for training process', + name: 'BATCH_SIZE', + trim: false + ), + string( + defaultValue: '3', + description: 'Number of training epochs', + name: 'NUM_EPOCHS', + trim: false + ), + string( + defaultValue: '1e-6', + description: 'Learning rate', + name: 'LR', + trim: false + ), + ]) + ]) + } + + stage('Git clone') { + //cloning git repo + checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 's424714', url: 'https://git.wmi.amu.edu.pl/s424714/ium_424714']]]) + } + + stage('Dockerfile build') { + + + sh "chmod +x -R ${env.WORKSPACE}" + + def dockerImage = docker.build("s424714-model") + + dockerImage.inside { + stage("Docker: cloning artifacts"){ + copyArtifacts fingerprintArtifacts: true, projectName: 's424714-create-dataset', selector: buildParameter('BUILD_SELECTOR') + sh 'mkdir -p ./data/dataset' + sh 'mv -t ./data/dataset train.csv test.csv val.csv' + } + stage("Docker: Running training model") + + { + sh 'python ./src/main.py --train --lr=$LR --batch=$BATCH_SIZE --epochs=$NUM_EPOCHS' + sh "cp ./results/model.pt ${WORKSPACE}" + } + + } + + } + + stage('Saving artefacts') { + echo 'Goodbye!' + sh 'ls' + archiveArtifacts artifacts: '*.pt' + } +} \ No newline at end of file diff --git a/src/main.py b/src/main.py index b5f136f..a81190a 100644 --- a/src/main.py +++ b/src/main.py @@ -10,13 +10,6 @@ from evaluate import evaluate # SEED = 2137 -# Hyperparameters - -INITIAL_LR = 1e-6 -NUM_EPOCHS = 2 -BATCH_SIZE = 2 - - # argument parser parser = argparse.ArgumentParser( @@ -27,12 +20,25 @@ parser.add_argument("--train", action="store_true", default=False) parser.add_argument("--test", action="store_true", default=False) parser.add_argument("--model_path", type=str, default="results/model.pt") parser.add_argument("--results_path", type=str, default="results/results.csv") +# HYPER PARAMETERS +parser.add_argument("--batch", "-b", type=int, default=2) +parser.add_argument("--learning_rate", "--lr", type=float, default=1e-6) +parser.add_argument("--num_epochs", "--epochs", "-e", type=int, default=3) if __name__ == "__main__": args = parser.parse_args() + INITIAL_LR = args.learning_rate + NUM_EPOCHS = args.num_epochs + BATCH_SIZE = args.batch + + print("INITIAL_LR: ", INITIAL_LR) + print("NUM_EPOCHS: ", NUM_EPOCHS) + print("BATCH_SIZE: ", BATCH_SIZE) print("CUDA: ", torch.cuda.is_available()) + + # raise # loading & spliting data news_dataset = NewsDataset(data_dir_path="data", data_lenght=1000)