This commit is contained in:
Mateusz Piwowarski 2024-03-24 11:31:18 +01:00
parent 52d2aa8a79
commit 37cdcc397f
2 changed files with 54 additions and 30 deletions

6
Jenkinsfile vendored
View File

@ -1,5 +1,6 @@
pipeline {
agent any
parameters {
string (
defaultValue: 'vskyper',
@ -29,5 +30,10 @@ pipeline {
}
}
}
stage('Archive artifacts') {
steps {
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
}
}
}
}

View File

@ -1,7 +1,25 @@
#!/bin/bash
# Install the Kaggle API
pip install kaggle
# Download the dataset from Kaggle
kaggle datasets download -d mlg-ulb/creditcardfraud
# Unzip the dataset
unzip -o creditcardfraud.zip
# Remove the zip file
rm creditcardfraud.zip
# Shuffle the dataset
shuf creditcard.csv > creditcard_shuf.csv
# Remove the original dataset
rm creditcard.csv
# Split the dataset into training and testing
head -n 10000 creditcard_shuf.csv > creditcard_train.csv
tail -n +10001 creditcard_shuf.csv > creditcard_test.csv
# Create a directory for the data
mkdir -p data
# Move the datasets to the data directory
mv creditcard_shuf.csv creditcard_train.csv creditcard_test.csv data/