This commit is contained in:
patrycjalazna 2021-03-28 16:58:01 +02:00
parent c7945e9cf0
commit 2be81bd32d

View File

@ -18,11 +18,14 @@ head -n -1 avocado-updated-2020.csv | shuf > avocado-updated-2020-shuf.csv
head -n "$1" avocado-updated-2020-shuf.csv > avocado-2020.csv
TRUNCATED_SIZE=$(wc -l avocado-2020.csv | awk '{print $1}')
echo "truncated size $TRUNCATED_SIZE"
TEST_SIZE=0,2*$TRUNCATED_SIZE
DEV_SIZE=2*0,2*$TRUNCATED_SIZE
TRAIN_SIZE=2*0,2*$TRUNCATED_SIZE)+1
# podzial na train/dev/test 6/2/2
head -n (0,2*$TRUNCATED_SIZE) avocado-2020.csv > avocado-updated-2020-test.csv
head -n (2*0,2*$TRUNCATED_SIZE) avocado-2020.csv | tail -n (0,2*$TRUNCATED_SIZE) > avocado-updated-2020-dev.csv
tail -n +((2*0,2*$TRUNCATED_SIZE)+1) avocado-2020.csv > avocado-updated-2020-train.csv
head -n $TEST_SIZE avocado-2020.csv > avocado-updated-2020-test.csv
head -n $DEV_SIZE avocado-2020.csv | tail -n $TEST_SIZE > avocado-updated-2020-dev.csv
tail -n +$TRAIN_SIZE avocado-2020.csv > avocado-updated-2020-train.csv
wc -l avocado-updated-2020-*.csv