preprocessing script added
This commit is contained in:
parent
9226d17c20
commit
ece8db04cf
38
avocado-preprocessing.sh
Executable file
38
avocado-preprocessing.sh
Executable file
@ -0,0 +1,38 @@
|
||||
# zadanie 5
|
||||
|
||||
# pobranie zbioru danych
|
||||
kaggle datasets download -d timmate/avocado-prices-2020
|
||||
unzip -o avocado-prices-2020.zip
|
||||
wc -l avocado-updated-2020.csv
|
||||
|
||||
# przetwarzanie pliku
|
||||
grep -P "^$" -n avocado-updated-2020.csv
|
||||
|
||||
# usuniecie zbednej kolumny year (redundantne dane)
|
||||
cut -d -f12 --complement avocado-updated-2020.csv
|
||||
ghead -n 1 avocado-updated-2020.csv> header.csv
|
||||
ghead -n -1 avocado-updated-2020.csv | shuf > avocado-updated-2020-shuf.csv
|
||||
|
||||
# podzial na train/dev/test
|
||||
ghead -n 6609 avocado-updated-2020-shuf.csv > avocado-updated-2020-test.csv
|
||||
ghead -n 13218 avocado-updated-2020-shuf.csv | tail -n 6609 > avocado-updated-2020-dev.csv
|
||||
tail -n +13219 avocado-updated-2020-shuf.csv > avocado-updated-2020-train.csv
|
||||
|
||||
wc -l avocado-updated-2020-*.csv
|
||||
|
||||
cat header.csv avocado-updated-2020-test.csv > test.csv
|
||||
cat header.csv avocado-updated-2020-dev.csv > dev.csv
|
||||
cat header.csv avocado-updated-2020-train.csv > train.csv
|
||||
|
||||
# usuniecie pomocniczych plikow
|
||||
rm avocado-updated-2020-test.csv
|
||||
rm avocado-updated-2020-dev.csv
|
||||
rm avocado-updated-2020-train.csv
|
||||
rm avocado-updated-2020-shuf.csv
|
||||
rm avocado-prices-2020.zip
|
||||
rm header.csv
|
||||
|
||||
|
||||
# # 7. Parametry zadania [1 pkt]
|
||||
# head -n $1 data.shuffled > zadanie7.data
|
||||
# cat header.csv zadanie7.data > zadanie7.csv
|
Loading…
Reference in New Issue
Block a user