Change directory structure

This commit is contained in:
s487179 2023-04-14 18:51:14 +02:00
parent 67261c7309
commit fa7010330e
3 changed files with 48 additions and 0 deletions

40
createDataset/Jenkinsfile vendored Normal file
View File

@ -0,0 +1,40 @@
pipeline {
agent any
parameters {
string(
defaultValue: 'wojciechbatruszewicz',
description: 'Kaggle username',
name: 'KAGGLE_USERNAME',
trim: false
)
password(
defaultValue: '',
description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
name: 'KAGGLE_KEY'
)
string(
defaultValue: '50',
description: 'dataset cutoff',
name: 'CUTOFF',
trim: false
)
}
stages {
stage('Run sh file') {
steps {
checkout scm
sh 'ls -l'
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
"KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
sh 'chmod +x ./datasetScript.sh'
sh './datasetScript.sh'
}
}
}
stage('Archive file') {
steps {
archiveArtifacts artifacts: 'loan_sanction_shuffled.csv', fingerprint: true
}
}
}
}

View File

@ -0,0 +1,8 @@
#!/bin/bash
echo "KAGGLE_USERNAME: ${KAGGLE_USERNAME}"
kaggle datasets download -d rishikeshkonapure/home-loan-approval
unzip -o home-loan-approval.zip
cat loan_sanction_test.csv loan_sanction_train.csv > loan_sanction.csv
head -n 5 loan_sanction.csv
{ head -n 1 loan_sanction.csv && tail -n +2 loan_sanction.csv | shuf; } | tail -n +2 | head -n $(($CUTOFF+1)) | cat <(head -n 1 loan_sanction.csv) - > loan_sanction_shuffled.csv
head -n 5 loan_sanction_shuffled.csv

Binary file not shown.