ium_444465/dataset_script.sh
Andrzej Preibisz c1b7825305 script fixes
2022-03-27 17:58:55 +02:00

22 lines
562 B
Bash

#!/bin/bash
CUTOFF=$1
! rm -rf heart_2020_*
! rm -rf personal-key-indicators-of-heart-disease.zip
TEST_SIZE=$(($CUTOFF * 1/5))
TRAIN_START=$(($TEST_SIZE+1))
echo $TEST_SIZE
echo $TRAIN_START
echo $KAGGLE_USERNAME
kaggle datasets download -d kamilpytlak/personal-key-indicators-of-heart-disease --force
unzip personal-key-indicators-of-heart-disease -d ./
head -n $CUTOFF heart_2020_cleaned.csv | shuf > heart_2020_sorted.csv
head -n $TEST_SIZE heart_2020_sorted.csv > heart_2020_test.csv
tail -n +$TRAIN_START heart_2020_sorted.csv > heart_2020_train.csv