This commit is contained in:
Mateusz Piwowarski 2024-03-24 11:31:18 +01:00
parent 52d2aa8a79
commit 37cdcc397f
2 changed files with 54 additions and 30 deletions

62
Jenkinsfile vendored
View File

@ -1,33 +1,39 @@
pipeline { pipeline {
agent any agent any
parameters {
string ( parameters {
defaultValue: 'vskyper', string (
description: 'Kaggle username', defaultValue: 'vskyper',
name: 'KAGGLE_USERNAME', description: 'Kaggle username',
trim: false name: 'KAGGLE_USERNAME',
) trim: false
password ( )
defaultValue: '', password (
description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials', defaultValue: '',
name: 'KAGGLE_KEY', description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
) name: 'KAGGLE_KEY',
} )
stages { }
stage('Clone Repository') { stages {
steps { stage('Clone Repository') {
git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git' steps {
} git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
} }
stage('Download dataset') { }
steps { stage('Download dataset') {
script { steps {
withEnv (["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) { script {
sh 'chmod +x download_dataset.sh' withEnv (["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
sh './download_dataset.sh' sh 'chmod +x download_dataset.sh'
} sh './download_dataset.sh'
}
} }
} }
}
} }
stage('Archive artifacts') {
steps {
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
}
}
}
} }

View File

@ -1,7 +1,25 @@
#!/bin/bash #!/bin/bash
# Install the Kaggle API
pip install kaggle pip install kaggle
# Download the dataset from Kaggle
kaggle datasets download -d mlg-ulb/creditcardfraud kaggle datasets download -d mlg-ulb/creditcardfraud
unzip -o creditcardfraud.zip # Unzip the dataset
unzip -o creditcardfraud.zip
# Remove the zip file
rm creditcardfraud.zip
# Shuffle the dataset
shuf creditcard.csv > creditcard_shuf.csv
# Remove the original dataset
rm creditcard.csv
# Split the dataset into training and testing
head -n 10000 creditcard_shuf.csv > creditcard_train.csv
tail -n +10001 creditcard_shuf.csv > creditcard_test.csv
# Create a directory for the data
mkdir -p data
# Move the datasets to the data directory
mv creditcard_shuf.csv creditcard_train.csv creditcard_test.csv data/