From 16d760300991388e239e89ac96332d32cd6ddbe4 Mon Sep 17 00:00:00 2001 From: Mateusz Piwowarski Date: Sun, 24 Mar 2024 12:55:09 +0100 Subject: [PATCH] Jenkins --- download_dataset.sh | 25 +++++++++++++++++++++---- stats.sh | 4 ++-- stats/Jenkinsfile | 2 +- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/download_dataset.sh b/download_dataset.sh index 5ea195b..3b2f183 100644 --- a/download_dataset.sh +++ b/download_dataset.sh @@ -10,14 +10,31 @@ unzip -o creditcardfraud.zip # Remove the zip file rm creditcardfraud.zip -# Shuffle the dataset -shuf creditcard.csv > creditcard_shuf.csv +# Create a header file +head -n 1 creditcard.csv > creditcard_header.csv +# Remove the header from the dataset +tail -n +2 creditcard.csv > creditcard_no_header.csv # Remove the original dataset rm creditcard.csv +# Shuffle the dataset +shuf creditcard_no_header.csv > creditcard_shuf_no_header.csv +# Remove the unshuffled dataset +rm creditcard_no_header.csv + +# Add the header back to the shuffled dataset +cat creditcard_header.csv creditcard_shuf_no_header.csv > creditcard_shuf.csv + # Split the dataset into training and testing -tail -n +10001 creditcard_shuf.csv > creditcard_train.csv -head -n 10000 creditcard_shuf.csv > creditcard_test.csv +tail -n +10001 creditcard_shuf_no_header.csv > creditcard_train_no_header.csv +head -n 10000 creditcard_shuf_no_header.csv > creditcard_test_no_header.csv + +# Add the header back to the training and testing datasets +cat creditcard_header.csv creditcard_train_no_header.csv > creditcard_train.csv +cat creditcard_header.csv creditcard_test_no_header.csv > creditcard_test.csv + +# Remove the intermediate files +rm creditcard_header.csv creditcard_shuf_no_header.csv creditcard_train_no_header.csv creditcard_test_no_header.csv # Create a directory for the data mkdir -p data diff --git a/stats.sh b/stats.sh index 04fbd72..a0a2ce8 100644 --- a/stats.sh +++ b/stats.sh @@ -7,6 +7,6 @@ wc -l < data/creditcard_train.csv > stats_train.txt wc -l < data/creditcard_test.csv > stats_test.txt # Create a directory for the statistics -mkdir -p stats +mkdir -p stats_data # Move the statistics to the stats directory -mv stats.txt stats_train.txt stats_test.txt stats/ \ No newline at end of file +mv stats.txt stats_train.txt stats_test.txt stats_data/ \ No newline at end of file diff --git a/stats/Jenkinsfile b/stats/Jenkinsfile index d788b7b..6610ecd 100644 --- a/stats/Jenkinsfile +++ b/stats/Jenkinsfile @@ -28,7 +28,7 @@ pipeline { } stage('Archive Artifacts') { steps { - archiveArtifacts artifacts: 'stats/*', onlyIfSuccessful: true + archiveArtifacts artifacts: 'stats_data/*', onlyIfSuccessful: true } } }