script fixes

This commit is contained in:
Andrzej Preibisz 2022-03-27 17:16:48 +02:00
parent fe4874a2a1
commit cc4a903383

View File

@ -6,21 +6,14 @@ TRAIN_START=$(($TEST_SIZE+1))
echo $TEST_SIZE
echo $TRAIN_START
echo $KAGGLE_USERNAME
kaggle datasets download -d kamilpytlak/personal-key-indicators-of-heart-disease --force
! unzip personal-key-indicators-of-heart-disease -d ./
! wc -l heart_2020_cleaned.csv
! head -n 10 heart_2020_cleaned.csv
echo "Categories: \n"
! cut -f 10 -d "," heart_2020_cleaned.csv | sort | uniq -c
! cut -f 12 -d "," heart_2020_cleaned.csv | sort | uniq -c
! grep -P "^$" -n heart_2020_cleaned.csv
! head -n $CUTOFF heart_2020_cleaned.csv | shuf > heart_2020_sorted.csv
! head -n $TEST_SIZE heart_2020_sorted.csv > heart_2020_test.csv
! tail -n +$TRAIN_START heart_2020_sorted.csv > heart_2020_train.csv
! wc -l heart_2020_t*