#!/bin/bash # Install the Kaggle API pip install kaggle # Download the dataset from Kaggle kaggle datasets download -d mlg-ulb/creditcardfraud # Unzip the dataset unzip -o creditcardfraud.zip # Remove the zip file rm creditcardfraud.zip # Create a header file head -n 1 creditcard.csv > creditcard_header.csv # Remove the header from the dataset tail -n +2 creditcard.csv > creditcard_no_header.csv # Remove the original dataset rm creditcard.csv # Shuffle the dataset shuf creditcard_no_header.csv > creditcard_shuf_no_header.csv # Remove the unshuffled dataset rm creditcard_no_header.csv # Add the header back to the shuffled dataset cat creditcard_header.csv creditcard_shuf_no_header.csv > creditcard_shuf.csv # Split the dataset into training and testing tail -n +10001 creditcard_shuf_no_header.csv > creditcard_train_no_header.csv head -n 10000 creditcard_shuf_no_header.csv > creditcard_test_no_header.csv # Add the header back to the training and testing datasets cat creditcard_header.csv creditcard_train_no_header.csv > creditcard_train.csv cat creditcard_header.csv creditcard_test_no_header.csv > creditcard_test.csv # Remove the intermediate files rm creditcard_header.csv creditcard_shuf_no_header.csv creditcard_train_no_header.csv creditcard_test_no_header.csv # Create a directory for the data mkdir -p data # Move the datasets to the data directory mv creditcard_shuf.csv creditcard_train.csv creditcard_test.csv data/