# zadanie 5 # pobranie zbioru danych kaggle datasets download -d timmate/avocado-prices-2020 unzip -o avocado-prices-2020.zip wc -l avocado-updated-2020.csv # przetwarzanie pliku grep -P "^$" -n avocado-updated-2020.csv # usuniecie zbednej kolumny year (redundantne dane) cut -d -f12 --complement avocado-updated-2020.csv ghead -n 1 avocado-updated-2020.csv> header.csv ghead -n -1 avocado-updated-2020.csv | shuf > avocado-updated-2020-shuf.csv # podzial na train/dev/test ghead -n 6609 avocado-updated-2020-shuf.csv > avocado-updated-2020-test.csv ghead -n 13218 avocado-updated-2020-shuf.csv | tail -n 6609 > avocado-updated-2020-dev.csv tail -n +13219 avocado-updated-2020-shuf.csv > avocado-updated-2020-train.csv wc -l avocado-updated-2020-*.csv cat header.csv avocado-updated-2020-test.csv > test.csv cat header.csv avocado-updated-2020-dev.csv > dev.csv cat header.csv avocado-updated-2020-train.csv > train.csv # usuniecie pomocniczych plikow rm avocado-updated-2020-test.csv rm avocado-updated-2020-dev.csv rm avocado-updated-2020-train.csv rm avocado-updated-2020-shuf.csv rm avocado-prices-2020.zip rm header.csv