diff --git a/dataset_script.sh b/dataset_script.sh index f3caf97..bdc6d4b 100644 --- a/dataset_script.sh +++ b/dataset_script.sh @@ -6,21 +6,14 @@ TRAIN_START=$(($TEST_SIZE+1)) echo $TEST_SIZE echo $TRAIN_START echo $KAGGLE_USERNAME + kaggle datasets download -d kamilpytlak/personal-key-indicators-of-heart-disease --force ! unzip personal-key-indicators-of-heart-disease -d ./ + ! wc -l heart_2020_cleaned.csv - -! head -n 10 heart_2020_cleaned.csv -echo "Categories: \n" -! cut -f 10 -d "," heart_2020_cleaned.csv | sort | uniq -c -! cut -f 12 -d "," heart_2020_cleaned.csv | sort | uniq -c - - -! grep -P "^$" -n heart_2020_cleaned.csv ! head -n $CUTOFF heart_2020_cleaned.csv | shuf > heart_2020_sorted.csv ! head -n $TEST_SIZE heart_2020_sorted.csv > heart_2020_test.csv ! tail -n +$TRAIN_START heart_2020_sorted.csv > heart_2020_train.csv -! wc -l heart_2020_t*