cutoff
This commit is contained in:
parent
b6c1bb0227
commit
c7945e9cf0
@ -3,7 +3,7 @@
|
||||
# pobranie zbioru danych
|
||||
kaggle datasets download -d timmate/avocado-prices-2020
|
||||
unzip -o avocado-prices-2020.zip
|
||||
TOTAL_SIZE=$(wc -l avocado-updated-2020.csv)
|
||||
TOTAL_SIZE=$(wc -l avocado-updated-2020.csv | awk '{print $1}')
|
||||
echo "size $TOTAL_SIZE"
|
||||
|
||||
# przetwarzanie pliku
|
||||
@ -16,13 +16,13 @@ head -n 1 avocado-updated-2020.csv > header.csv
|
||||
head -n -1 avocado-updated-2020.csv | shuf > avocado-updated-2020-shuf.csv
|
||||
|
||||
head -n "$1" avocado-updated-2020-shuf.csv > avocado-2020.csv
|
||||
TRUNCATED_SIZE=$(wc -l avocado-2020.csv)
|
||||
TRUNCATED_SIZE=$(wc -l avocado-2020.csv | awk '{print $1}')
|
||||
echo "truncated size $TRUNCATED_SIZE"
|
||||
|
||||
# podzial na train/dev/test 6/2/2
|
||||
head -n 6609 avocado-2020.csv > avocado-updated-2020-test.csv
|
||||
head -n 13218 avocado-2020.csv | tail -n 6609 > avocado-updated-2020-dev.csv
|
||||
tail -n +13219 avocado-2020.csv > avocado-updated-2020-train.csv
|
||||
head -n (0,2*$TRUNCATED_SIZE) avocado-2020.csv > avocado-updated-2020-test.csv
|
||||
head -n (2*0,2*$TRUNCATED_SIZE) avocado-2020.csv | tail -n (0,2*$TRUNCATED_SIZE) > avocado-updated-2020-dev.csv
|
||||
tail -n +((2*0,2*$TRUNCATED_SIZE)+1) avocado-2020.csv > avocado-updated-2020-train.csv
|
||||
|
||||
wc -l avocado-updated-2020-*.csv
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user