27 lines
734 B
Bash
27 lines
734 B
Bash
#!/bin/bash
|
|
CUTOFF=$1
|
|
|
|
TEST_SIZE=$(($CUTOFF * 1/5))
|
|
TRAIN_START=$(($TEST_SIZE+1))
|
|
echo $TEST_SIZE
|
|
echo $TRAIN_START
|
|
|
|
! kaggle datasets download kamilpytlak/personal-key-indicators-of-heart-disease
|
|
! unzip personal-key-indicators-of-heart-disease -d ./
|
|
! wc -l heart_2020_cleaned.csv
|
|
|
|
! head -n 10 heart_2020_cleaned.csv
|
|
echo "Categories: \n"
|
|
! cut -f 10 -d "," heart_2020_cleaned.csv | sort | uniq -c
|
|
! cut -f 12 -d "," heart_2020_cleaned.csv | sort | uniq -c
|
|
|
|
|
|
! grep -P "^$" -n heart_2020_cleaned.csv
|
|
! head -n $CUTOFF heart_2020_cleaned.csv | shuf > heart_2020_sorted.csv
|
|
|
|
! head -n $TEST_SIZE heart_2020_sorted.csv > heart_2020_test.csv
|
|
! tail -n +$TRAIN_START heart_2020_sorted.csv > heart_2020_train.csv
|
|
|
|
! wc -l heart_2020_t*
|
|
|