This commit is contained in:
patrycjalazna 2021-03-28 16:44:27 +02:00
parent f9386a6a32
commit ae19f828c0

View File

@ -3,8 +3,8 @@
# pobranie zbioru danych # pobranie zbioru danych
kaggle datasets download -d timmate/avocado-prices-2020 kaggle datasets download -d timmate/avocado-prices-2020
unzip -o avocado-prices-2020.zip unzip -o avocado-prices-2020.zip
total_size = $(wc -l avocado-updated-2020.csv) TOTAL_SIZE= $(wc -l avocado-updated-2020.csv)
echo size $total_size echo size $TOTAL_SIZE
# przetwarzanie pliku # przetwarzanie pliku
grep -P "^$" -n avocado-updated-2020.csv grep -P "^$" -n avocado-updated-2020.csv
@ -16,8 +16,8 @@ head -n 1 avocado-updated-2020.csv > header.csv
head -n -1 avocado-updated-2020.csv | shuf > avocado-updated-2020-shuf.csv head -n -1 avocado-updated-2020.csv | shuf > avocado-updated-2020-shuf.csv
head -n "$1" avocado-updated-2020-shuf.csv > avocado-2020.csv head -n "$1" avocado-updated-2020-shuf.csv > avocado-2020.csv
truncated_size = $(wc -l avocado-2020.csv) TRUNCATED_SIZE = $(wc -l avocado-2020.csv)
echo truncated size $truncated_size echo truncated size $TRUNCATED_SIZE
# podzial na train/dev/test 6/2/2 # podzial na train/dev/test 6/2/2
head -n 6609 avocado-2020.csv > avocado-updated-2020-test.csv head -n 6609 avocado-2020.csv > avocado-updated-2020-test.csv