diff --git a/Jenkinsfile_2 b/Jenkinsfile_2 new file mode 100644 index 0000000..b706483 --- /dev/null +++ b/Jenkinsfile_2 @@ -0,0 +1,44 @@ +pipeline { + agent any + parameters { + string( + defaultValue: 'kamilab0bkowska', + description: 'kaggle username', + name: 'KAGGLE_USERNAME', + trim: false + ) + password( + defaultValue: '', + description: 'kaggle passowrd', + name: 'KAGGLE_KEY' + ) + buildSelector( + defaultSelector: lastSuccessful(), + description: 'Which build to use for copying artifacts', + name: 'BUILD_SELECTOR' + ) + } + + stages { + stage('Stage 1') { + steps { + echo 'Hello world but for the second time!' + } + } + + stage('Copy and proceed') { + steps { + copyArtifacts fingerprintArtifacts: true, projectName: 's444517-create-dataset', selector: buildParameter('BUILD_SELECTOR') + sh 'chmod +x -R ./data_stats.sh' + withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", + "KAGGLE_KEY=${params.KAGGLE_KEY}"]) { + sh 'echo KAGGLE_USERNAME: $KAGGLE_USERNAME' + sh './data_stats.sh' + archiveArtifacts artifacts: 'apps_train.csv, apps_test.csv, apps_validate.csv' + } + } + } + + } +} + diff --git a/data_prep.sh b/data_prep.sh index a6c66a4..7cc0ef5 100755 --- a/data_prep.sh +++ b/data_prep.sh @@ -1,6 +1,6 @@ #!/bin/bash -#kaggle datasets download -d lava18/google-play-store-apps +kaggle datasets download -d lava18/google-play-store-apps unzip -o google-play-store-apps.zip sed -i '1d' googleplaystore.csv shuf googleplaystore.csv > apps_shuf_.csv @@ -18,7 +18,4 @@ sed -i "$lines"'d' apps_shuf.csv head -n $train_val_set apps_shuf.csv > apps_test.csv lines="1,$train_val_set" sed -i "$lines"'d' apps_shuf.csv -head -n $train_val_set apps_shuf.csv > apps_validate.csv -wc -l apps_train.csv -wc -l apps_test.csv -wc -l apps_validate.csv \ No newline at end of file +head -n $train_val_set apps_shuf.csv > apps_validate.csv \ No newline at end of file diff --git a/data_stats.sh b/data_stats.sh new file mode 100644 index 0000000..1e77ecb --- /dev/null +++ b/data_stats.sh @@ -0,0 +1,3 @@ +wc -l apps_train.csv > stats.txt +wc -l apps_test.csv >> stats.txt +wc -l apps_validate.csv >> stats.txt \ No newline at end of file