From 16d760300991388e239e89ac96332d32cd6ddbe4 Mon Sep 17 00:00:00 2001
From: Mateusz Piwowarski <matpiw1@st.amu.edu.pl>
Date: Sun, 24 Mar 2024 12:55:09 +0100
Subject: [PATCH] Jenkins

---
 download_dataset.sh | 25 +++++++++++++++++++++----
 stats.sh            |  4 ++--
 stats/Jenkinsfile   |  2 +-
 3 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/download_dataset.sh b/download_dataset.sh
index 5ea195b..3b2f183 100644
--- a/download_dataset.sh
+++ b/download_dataset.sh
@@ -10,14 +10,31 @@ unzip -o creditcardfraud.zip
 # Remove the zip file
 rm creditcardfraud.zip
 
-# Shuffle the dataset
-shuf creditcard.csv > creditcard_shuf.csv
+# Create a header file
+head -n 1 creditcard.csv > creditcard_header.csv
+# Remove the header from the dataset
+tail -n +2 creditcard.csv > creditcard_no_header.csv
 # Remove the original dataset
 rm creditcard.csv
 
+# Shuffle the dataset
+shuf creditcard_no_header.csv > creditcard_shuf_no_header.csv
+# Remove the unshuffled dataset
+rm creditcard_no_header.csv
+
+# Add the header back to the shuffled dataset
+cat creditcard_header.csv creditcard_shuf_no_header.csv > creditcard_shuf.csv
+
 # Split the dataset into training and testing
-tail -n +10001 creditcard_shuf.csv > creditcard_train.csv
-head -n 10000 creditcard_shuf.csv > creditcard_test.csv
+tail -n +10001 creditcard_shuf_no_header.csv > creditcard_train_no_header.csv
+head -n 10000 creditcard_shuf_no_header.csv > creditcard_test_no_header.csv
+
+# Add the header back to the training and testing datasets
+cat creditcard_header.csv creditcard_train_no_header.csv > creditcard_train.csv
+cat creditcard_header.csv creditcard_test_no_header.csv > creditcard_test.csv
+
+# Remove the intermediate files
+rm creditcard_header.csv creditcard_shuf_no_header.csv creditcard_train_no_header.csv creditcard_test_no_header.csv
 
 # Create a directory for the data
 mkdir -p data
diff --git a/stats.sh b/stats.sh
index 04fbd72..a0a2ce8 100644
--- a/stats.sh
+++ b/stats.sh
@@ -7,6 +7,6 @@ wc -l < data/creditcard_train.csv > stats_train.txt
 wc -l < data/creditcard_test.csv > stats_test.txt
 
 # Create a directory for the statistics
-mkdir -p stats
+mkdir -p stats_data
 # Move the statistics to the stats directory
-mv stats.txt stats_train.txt stats_test.txt stats/
\ No newline at end of file
+mv stats.txt stats_train.txt stats_test.txt stats_data/
\ No newline at end of file
diff --git a/stats/Jenkinsfile b/stats/Jenkinsfile
index d788b7b..6610ecd 100644
--- a/stats/Jenkinsfile
+++ b/stats/Jenkinsfile
@@ -28,7 +28,7 @@ pipeline {
     }
     stage('Archive Artifacts') {
       steps {
-        archiveArtifacts artifacts: 'stats/*', onlyIfSuccessful: true
+        archiveArtifacts artifacts: 'stats_data/*', onlyIfSuccessful: true
       }
     }
   }