ium_444465/dataset_script.sh

20 lines
524 B
Bash
Raw Normal View History

2022-03-27 13:28:05 +02:00
#!/bin/bash
CUTOFF=$1
TEST_SIZE=$(($CUTOFF * 1/5))
TRAIN_START=$(($TEST_SIZE+1))
echo $TEST_SIZE
echo $TRAIN_START
2022-03-27 14:59:01 +02:00
echo $KAGGLE_USERNAME
2022-03-27 17:16:48 +02:00
2022-03-27 14:59:01 +02:00
kaggle datasets download -d kamilpytlak/personal-key-indicators-of-heart-disease --force
2022-03-26 22:41:13 +01:00
! unzip personal-key-indicators-of-heart-disease -d ./
2022-03-27 17:16:48 +02:00
! wc -l heart_2020_cleaned.csv
2022-03-27 13:28:05 +02:00
! head -n $CUTOFF heart_2020_cleaned.csv | shuf > heart_2020_sorted.csv
2022-03-26 22:41:13 +01:00
2022-03-27 13:28:05 +02:00
! head -n $TEST_SIZE heart_2020_sorted.csv > heart_2020_test.csv
! tail -n +$TRAIN_START heart_2020_sorted.csv > heart_2020_train.csv
2022-03-26 22:41:13 +01:00