diff --git a/CreateDataset.dockerfile b/CreateDataset.dockerfile new file mode 100644 index 0000000..3f303bf --- /dev/null +++ b/CreateDataset.dockerfile @@ -0,0 +1,9 @@ +FROM ubuntu:latest + +ADD get-data.sh /get-data.sh +ADD prepare_dataset.py /prepare_dataset.py + +RUN apt-get update +RUN apt-get install -y python3 python3-pip unzip +RUN pip install pandas +RUN pip install scikit-learn diff --git a/DatasetStats.dockerfile b/DatasetStats.dockerfile new file mode 100644 index 0000000..08ee655 --- /dev/null +++ b/DatasetStats.dockerfile @@ -0,0 +1 @@ +FROM ubuntu:latest \ No newline at end of file diff --git a/JenkinsfileCreateDataset b/JenkinsfileCreateDataset index a6e03f3..fda2d04 100644 --- a/JenkinsfileCreateDataset +++ b/JenkinsfileCreateDataset @@ -21,22 +21,37 @@ pipeline { ) } stages { + stage('Checkout') { + steps { + sh 'rm -rf ium_z487183' + sh 'git clone https://git.wmi.amu.edu.pl/s487183/ium_z487183.git' + } + } stage('Prepare data') { steps { withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) { - sh './get-data.sh' - sh 'python3 prepare_dataset.py' + sh 'ium_z487183/get-data.sh' + sh 'python3 ium_z487183/prepare_dataset.py' } } } stage('Archive artifacts') { + agent { + dockerfile { + filename 'CreateDataset.dockerfile' + dir 'ium_z487183' + reuseNode true + } + } steps { - archiveArtifacts 'X_test.csv' - archiveArtifacts 'X_val.csv' - archiveArtifacts 'X_train.csv' - archiveArtifacts 'Y_test.csv' - archiveArtifacts 'Y_val.csv' - archiveArtifacts 'Y_train.csv' + withEnv(["CUTOFF=${params.CUTOFF}"]) { + archiveArtifacts 'X_test.csv' + archiveArtifacts 'X_val.csv' + archiveArtifacts 'X_train.csv' + archiveArtifacts 'Y_test.csv' + archiveArtifacts 'Y_val.csv' + archiveArtifacts 'Y_train.csv' + } } } } diff --git a/JenkinsfileDatasetStats b/JenkinsfileDatasetStats index b5da6cc..6bffb51 100644 --- a/JenkinsfileDatasetStats +++ b/JenkinsfileDatasetStats @@ -8,15 +8,24 @@ pipeline { ) } stages { - stage('Copy artifacts') { + stage('Checkout') { steps { - copyArtifacts filter: 'X_test.csv,X_val.csv,X_train.csv,Y_test.csv,Y_val.csv,Y_train.csv', fingerprintArtifacts: true, projectName: 'z487183-create-dataset', selector: workspace() + sh 'rm -rf ium_z487183' + sh 'git clone https://git.wmi.amu.edu.pl/s487183/ium_z487183.git' } } stage('Prepare stats') { + agent { + dockerfile { + filename 'DatasetStats.dockerfile' + dir 'ium_z487183' + reuseNode true + } + } steps { - sh './prepare-stats.sh' - archiveArtifacts 'stats.txt' + copyArtifacts filter: 'X_test.csv,X_val.csv,X_train.csv,Y_test.csv,Y_val.csv,Y_train.csv', fingerprintArtifacts: true, projectName: 'z487183-create-dataset', selector: workspace() + sh './prepare-stats.sh' + archiveArtifacts 'stats.txt' } } } diff --git a/prepare-stats.sh b/prepare-stats.sh index 6b7e054..14c05f9 100755 --- a/prepare-stats.sh +++ b/prepare-stats.sh @@ -1,4 +1,5 @@ #!/bin/bash +rm -f stats.txt touch stats.txt wc -l X_test.csv >> stats.txt wc -l X_val.csv >> stats.txt