This commit is contained in:
Mateusz Piwowarski 2024-03-24 11:31:18 +01:00
parent 52d2aa8a79
commit 37cdcc397f
2 changed files with 54 additions and 30 deletions

62
Jenkinsfile vendored
View File

@ -1,33 +1,39 @@
pipeline {
agent any
parameters {
string (
defaultValue: 'vskyper',
description: 'Kaggle username',
name: 'KAGGLE_USERNAME',
trim: false
)
password (
defaultValue: '',
description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
name: 'KAGGLE_KEY',
)
}
stages {
stage('Clone Repository') {
steps {
git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
}
}
stage('Download dataset') {
steps {
script {
withEnv (["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
sh 'chmod +x download_dataset.sh'
sh './download_dataset.sh'
}
}
agent any
parameters {
string (
defaultValue: 'vskyper',
description: 'Kaggle username',
name: 'KAGGLE_USERNAME',
trim: false
)
password (
defaultValue: '',
description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
name: 'KAGGLE_KEY',
)
}
stages {
stage('Clone Repository') {
steps {
git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
}
}
stage('Download dataset') {
steps {
script {
withEnv (["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
sh 'chmod +x download_dataset.sh'
sh './download_dataset.sh'
}
}
}
}
stage('Archive artifacts') {
steps {
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
}
}
}
}

View File

@ -1,7 +1,25 @@
#!/bin/bash
# Install the Kaggle API
pip install kaggle
# Download the dataset from Kaggle
kaggle datasets download -d mlg-ulb/creditcardfraud
unzip -o creditcardfraud.zip
# Unzip the dataset
unzip -o creditcardfraud.zip
# Remove the zip file
rm creditcardfraud.zip
# Shuffle the dataset
shuf creditcard.csv > creditcard_shuf.csv
# Remove the original dataset
rm creditcard.csv
# Split the dataset into training and testing
head -n 10000 creditcard_shuf.csv > creditcard_train.csv
tail -n +10001 creditcard_shuf.csv > creditcard_test.csv
# Create a directory for the data
mkdir -p data
# Move the datasets to the data directory
mv creditcard_shuf.csv creditcard_train.csv creditcard_test.csv data/