diff --git a/createDataset/Jenkinsfile b/createDataset/Jenkinsfile new file mode 100644 index 0000000..7b16cf6 --- /dev/null +++ b/createDataset/Jenkinsfile @@ -0,0 +1,40 @@ +pipeline { + agent any + parameters { + string( + defaultValue: 'wojciechbatruszewicz', + description: 'Kaggle username', + name: 'KAGGLE_USERNAME', + trim: false + ) + password( + defaultValue: '', + description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials', + name: 'KAGGLE_KEY' + ) + string( + defaultValue: '50', + description: 'dataset cutoff', + name: 'CUTOFF', + trim: false + ) + } + stages { + stage('Run sh file') { + steps { + checkout scm + sh 'ls -l' + withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", + "KAGGLE_KEY=${params.KAGGLE_KEY}" ]) { + sh 'chmod +x ./datasetScript.sh' + sh './datasetScript.sh' + } + } + } + stage('Archive file') { + steps { + archiveArtifacts artifacts: 'loan_sanction_shuffled.csv', fingerprint: true + } + } + } +} diff --git a/createDataset/datasetScript.sh b/createDataset/datasetScript.sh new file mode 100644 index 0000000..3a2d7b9 --- /dev/null +++ b/createDataset/datasetScript.sh @@ -0,0 +1,8 @@ +#!/bin/bash +echo "KAGGLE_USERNAME: ${KAGGLE_USERNAME}" +kaggle datasets download -d rishikeshkonapure/home-loan-approval +unzip -o home-loan-approval.zip +cat loan_sanction_test.csv loan_sanction_train.csv > loan_sanction.csv +head -n 5 loan_sanction.csv +{ head -n 1 loan_sanction.csv && tail -n +2 loan_sanction.csv | shuf; } | tail -n +2 | head -n $(($CUTOFF+1)) | cat <(head -n 1 loan_sanction.csv) - > loan_sanction_shuffled.csv +head -n 5 loan_sanction_shuffled.csv \ No newline at end of file diff --git a/createDataset/home-loan-approval.zip b/createDataset/home-loan-approval.zip new file mode 100644 index 0000000..861c320 Binary files /dev/null and b/createDataset/home-loan-approval.zip differ