This commit is contained in:
Mateusz Piwowarski 2024-03-24 12:55:09 +01:00
parent dc5fef338b
commit 16d7603009
3 changed files with 24 additions and 7 deletions

View File

@ -10,14 +10,31 @@ unzip -o creditcardfraud.zip
# Remove the zip file # Remove the zip file
rm creditcardfraud.zip rm creditcardfraud.zip
# Shuffle the dataset # Create a header file
shuf creditcard.csv > creditcard_shuf.csv head -n 1 creditcard.csv > creditcard_header.csv
# Remove the header from the dataset
tail -n +2 creditcard.csv > creditcard_no_header.csv
# Remove the original dataset # Remove the original dataset
rm creditcard.csv rm creditcard.csv
# Shuffle the dataset
shuf creditcard_no_header.csv > creditcard_shuf_no_header.csv
# Remove the unshuffled dataset
rm creditcard_no_header.csv
# Add the header back to the shuffled dataset
cat creditcard_header.csv creditcard_shuf_no_header.csv > creditcard_shuf.csv
# Split the dataset into training and testing # Split the dataset into training and testing
tail -n +10001 creditcard_shuf.csv > creditcard_train.csv tail -n +10001 creditcard_shuf_no_header.csv > creditcard_train_no_header.csv
head -n 10000 creditcard_shuf.csv > creditcard_test.csv head -n 10000 creditcard_shuf_no_header.csv > creditcard_test_no_header.csv
# Add the header back to the training and testing datasets
cat creditcard_header.csv creditcard_train_no_header.csv > creditcard_train.csv
cat creditcard_header.csv creditcard_test_no_header.csv > creditcard_test.csv
# Remove the intermediate files
rm creditcard_header.csv creditcard_shuf_no_header.csv creditcard_train_no_header.csv creditcard_test_no_header.csv
# Create a directory for the data # Create a directory for the data
mkdir -p data mkdir -p data

View File

@ -7,6 +7,6 @@ wc -l < data/creditcard_train.csv > stats_train.txt
wc -l < data/creditcard_test.csv > stats_test.txt wc -l < data/creditcard_test.csv > stats_test.txt
# Create a directory for the statistics # Create a directory for the statistics
mkdir -p stats mkdir -p stats_data
# Move the statistics to the stats directory # Move the statistics to the stats directory
mv stats.txt stats_train.txt stats_test.txt stats/ mv stats.txt stats_train.txt stats_test.txt stats_data/

2
stats/Jenkinsfile vendored
View File

@ -28,7 +28,7 @@ pipeline {
} }
stage('Archive Artifacts') { stage('Archive Artifacts') {
steps { steps {
archiveArtifacts artifacts: 'stats/*', onlyIfSuccessful: true archiveArtifacts artifacts: 'stats_data/*', onlyIfSuccessful: true
} }
} }
} }