diff --git a/Jenkinsfile b/Jenkinsfile index 743351f..8107c6b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,33 +1,39 @@ pipeline { - agent any - parameters { - string ( - defaultValue: 'vskyper', - description: 'Kaggle username', - name: 'KAGGLE_USERNAME', - trim: false - ) - password ( - defaultValue: '', - description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials', - name: 'KAGGLE_KEY', - ) - } - stages { - stage('Clone Repository') { - steps { - git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git' - } - } - stage('Download dataset') { - steps { - script { - withEnv (["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) { - sh 'chmod +x download_dataset.sh' - sh './download_dataset.sh' - } - } + agent any + + parameters { + string ( + defaultValue: 'vskyper', + description: 'Kaggle username', + name: 'KAGGLE_USERNAME', + trim: false + ) + password ( + defaultValue: '', + description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials', + name: 'KAGGLE_KEY', + ) + } + stages { + stage('Clone Repository') { + steps { + git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git' + } + } + stage('Download dataset') { + steps { + script { + withEnv (["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) { + sh 'chmod +x download_dataset.sh' + sh './download_dataset.sh' } } + } } + stage('Archive artifacts') { + steps { + archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true + } + } + } } \ No newline at end of file diff --git a/download_dataset.sh b/download_dataset.sh index 27ed624..3463cb9 100644 --- a/download_dataset.sh +++ b/download_dataset.sh @@ -1,7 +1,25 @@ #!/bin/bash +# Install the Kaggle API pip install kaggle - +# Download the dataset from Kaggle kaggle datasets download -d mlg-ulb/creditcardfraud -unzip -o creditcardfraud.zip \ No newline at end of file +# Unzip the dataset +unzip -o creditcardfraud.zip +# Remove the zip file +rm creditcardfraud.zip + +# Shuffle the dataset +shuf creditcard.csv > creditcard_shuf.csv +# Remove the original dataset +rm creditcard.csv + +# Split the dataset into training and testing +head -n 10000 creditcard_shuf.csv > creditcard_train.csv +tail -n +10001 creditcard_shuf.csv > creditcard_test.csv + +# Create a directory for the data +mkdir -p data +# Move the datasets to the data directory +mv creditcard_shuf.csv creditcard_train.csv creditcard_test.csv data/ \ No newline at end of file