ium_444465/dataset_script.sh

19 lines
598 B
Bash
Raw Normal View History

2022-03-26 22:41:13 +01:00
! kaggle datasets download kamilpytlak/personal-key-indicators-of-heart-disease
! unzip personal-key-indicators-of-heart-disease -d ./
! wc -l heart_2020_cleaned.csv
! head -n 10 heart_2020_cleaned.csv
echo "Categories: \n"
! cut -f 10 -d "," heart_2020_cleaned.csv | sort | uniq -c
! cut -f 12 -d "," heart_2020_cleaned.csv | sort | uniq -c
! grep -P "^$" -n heart_2020_cleaned.csv
! head -n -1 heart_2020_cleaned.csv | shuf > heart_2020_sorted.csv
! head -n 4000 heart_2020_sorted.csv > heart_2020_test.csv
! tail -n +4001 heart_2020_sorted.csv > heart_2020_train.csv
! wc -l heart_2020_t*