#!/bin/bash CUTOFF=$1 TEST_SIZE=$(($CUTOFF * 1/5)) TRAIN_START=$(($TEST_SIZE+1)) echo $TEST_SIZE echo $TRAIN_START echo $KAGGLE_USERNAME kaggle datasets download -d kamilpytlak/personal-key-indicators-of-heart-disease --force ! unzip personal-key-indicators-of-heart-disease -d ./ ! wc -l heart_2020_cleaned.csv ! head -n 10 heart_2020_cleaned.csv echo "Categories: \n" ! cut -f 10 -d "," heart_2020_cleaned.csv | sort | uniq -c ! cut -f 12 -d "," heart_2020_cleaned.csv | sort | uniq -c ! grep -P "^$" -n heart_2020_cleaned.csv ! head -n $CUTOFF heart_2020_cleaned.csv | shuf > heart_2020_sorted.csv ! head -n $TEST_SIZE heart_2020_sorted.csv > heart_2020_test.csv ! tail -n +$TRAIN_START heart_2020_sorted.csv > heart_2020_train.csv ! wc -l heart_2020_t*