2022-03-27 13:28:05 +02:00
|
|
|
#!/bin/bash
|
|
|
|
CUTOFF=$1
|
|
|
|
|
2022-03-27 17:23:55 +02:00
|
|
|
! rm -rf heart_2020_*
|
|
|
|
! rm -rf personal-key-indicators-of-heart-disease.zip
|
|
|
|
|
2022-03-27 13:28:05 +02:00
|
|
|
TEST_SIZE=$(($CUTOFF * 1/5))
|
|
|
|
TRAIN_START=$(($TEST_SIZE+1))
|
|
|
|
echo $TEST_SIZE
|
|
|
|
echo $TRAIN_START
|
2022-03-27 14:59:01 +02:00
|
|
|
echo $KAGGLE_USERNAME
|
2022-03-27 17:16:48 +02:00
|
|
|
|
2022-03-27 14:59:01 +02:00
|
|
|
kaggle datasets download -d kamilpytlak/personal-key-indicators-of-heart-disease --force
|
2022-03-27 17:23:55 +02:00
|
|
|
unzip personal-key-indicators-of-heart-disease -d ./
|
2022-03-26 22:41:13 +01:00
|
|
|
|
2022-03-27 17:23:55 +02:00
|
|
|
head -n $CUTOFF heart_2020_cleaned.csv | shuf > heart_2020_sorted.csv
|
2022-03-26 22:41:13 +01:00
|
|
|
|
2022-03-27 17:23:55 +02:00
|
|
|
head -n $TEST_SIZE heart_2020_sorted.csv > heart_2020_test.csv
|
|
|
|
tail -n +$TRAIN_START heart_2020_sorted.csv > heart_2020_train.csv
|
2022-03-26 22:41:13 +01:00
|
|
|
|
|
|
|
|