ium_478841/scripts/load_data.sh

29 lines
850 B
Bash
Raw Normal View History

2022-04-03 11:34:18 +02:00
#!/bin/bash
2022-03-27 23:34:18 +02:00
figlet "Welcome $KAGGLE_USERNAME"
2022-04-03 11:34:18 +02:00
# Clean the previous files
2022-03-27 23:34:18 +02:00
rm -r avocado.data*
2022-04-03 11:34:18 +02:00
echo "Removed previous data files"
# Install kaggle and python modules
# pip3 install --user kaggle
# pip3 install --user pandas
2022-03-27 23:34:18 +02:00
2022-04-03 11:34:18 +02:00
# Download the data
echo "Loading dataset..."
2022-03-27 23:34:18 +02:00
kaggle datasets download -d neuromusic/avocado-prices
2022-04-03 11:34:18 +02:00
echo "Extracting files from zip archive..."
2022-03-27 23:34:18 +02:00
unzip -o avocado-prices.zip
2022-04-03 20:17:21 +02:00
rm avocado-prizes.zip
mkdir data
mv avocado.csv data/.
2022-04-03 19:39:46 +02:00
echo Done
2022-04-03 11:34:18 +02:00
# Dividing data
2022-04-03 19:39:46 +02:00
# echo "Start the data splitting..."
# tail -n +2 avocado.csv | shuf > avocado_shuf.csv
# head -n 14000 avocado_shuf.csv > avocado.data.train
# tail -n +14001 avocado_shuf.csv | head -n 2249 > avocado.data.valid
# tail -n 2000 avocado_shuf.csv > avocado.data.test
2022-03-27 23:34:18 +02:00
2022-04-03 11:34:18 +02:00
# Saving simple stats in a text file
2022-04-03 19:39:46 +02:00
# echo "Getting simple stats..."
# wc -l avocado.data* > results.txt