ium_464913/download_dataset.sh

25 lines
673 B
Bash
Raw Normal View History

2024-03-24 10:41:05 +01:00
#!/bin/bash
2024-03-24 11:31:18 +01:00
# Install the Kaggle API
2024-03-24 10:41:05 +01:00
pip install kaggle
2024-03-24 11:31:18 +01:00
# Download the dataset from Kaggle
2024-03-24 10:41:05 +01:00
kaggle datasets download -d mlg-ulb/creditcardfraud
2024-03-24 11:31:18 +01:00
# Unzip the dataset
unzip -o creditcardfraud.zip
# Remove the zip file
rm creditcardfraud.zip
# Shuffle the dataset
shuf creditcard.csv > creditcard_shuf.csv
# Remove the original dataset
rm creditcard.csv
# Split the dataset into training and testing
2024-03-24 12:37:25 +01:00
tail -n +10001 creditcard_shuf.csv > creditcard_train.csv
head -n 10000 creditcard_shuf.csv > creditcard_test.csv
2024-03-24 11:31:18 +01:00
# Create a directory for the data
mkdir -p data
# Move the datasets to the data directory
mv creditcard_shuf.csv creditcard_train.csv creditcard_test.csv data/