From cc4a90338347d5a17c3985468fa0b6c4d198c6b1 Mon Sep 17 00:00:00 2001 From: Andrzej Preibisz Date: Sun, 27 Mar 2022 17:16:48 +0200 Subject: [PATCH] script fixes --- dataset_script.sh | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/dataset_script.sh b/dataset_script.sh index f3caf97..bdc6d4b 100644 --- a/dataset_script.sh +++ b/dataset_script.sh @@ -6,21 +6,14 @@ TRAIN_START=$(($TEST_SIZE+1)) echo $TEST_SIZE echo $TRAIN_START echo $KAGGLE_USERNAME + kaggle datasets download -d kamilpytlak/personal-key-indicators-of-heart-disease --force ! unzip personal-key-indicators-of-heart-disease -d ./ + ! wc -l heart_2020_cleaned.csv - -! head -n 10 heart_2020_cleaned.csv -echo "Categories: \n" -! cut -f 10 -d "," heart_2020_cleaned.csv | sort | uniq -c -! cut -f 12 -d "," heart_2020_cleaned.csv | sort | uniq -c - - -! grep -P "^$" -n heart_2020_cleaned.csv ! head -n $CUTOFF heart_2020_cleaned.csv | shuf > heart_2020_sorted.csv ! head -n $TEST_SIZE heart_2020_sorted.csv > heart_2020_test.csv ! tail -n +$TRAIN_START heart_2020_sorted.csv > heart_2020_train.csv -! wc -l heart_2020_t*