2022-04-03 11:34:18 +02:00
|
|
|
#!/bin/bash
|
2022-03-27 23:34:18 +02:00
|
|
|
figlet "Welcome $KAGGLE_USERNAME"
|
|
|
|
|
2022-04-03 11:34:18 +02:00
|
|
|
# Clean the previous files
|
2022-03-27 23:34:18 +02:00
|
|
|
rm -r avocado.data*
|
2022-04-03 11:34:18 +02:00
|
|
|
echo "Removed previous data files"
|
|
|
|
# Install kaggle and python modules
|
|
|
|
# pip3 install --user kaggle
|
|
|
|
# pip3 install --user pandas
|
2022-03-27 23:34:18 +02:00
|
|
|
|
2022-04-03 11:34:18 +02:00
|
|
|
# Download the data
|
|
|
|
echo "Loading dataset..."
|
2022-03-27 23:34:18 +02:00
|
|
|
kaggle datasets download -d neuromusic/avocado-prices
|
2022-04-03 11:34:18 +02:00
|
|
|
echo "Extracting files from zip archive..."
|
2022-03-27 23:34:18 +02:00
|
|
|
unzip -o avocado-prices.zip
|
2022-04-03 20:17:21 +02:00
|
|
|
rm avocado-prizes.zip
|
|
|
|
mkdir data
|
|
|
|
mv avocado.csv data/.
|
2022-04-03 19:39:46 +02:00
|
|
|
echo Done
|
2022-04-03 11:34:18 +02:00
|
|
|
# Dividing data
|
2022-04-03 19:39:46 +02:00
|
|
|
# echo "Start the data splitting..."
|
|
|
|
# tail -n +2 avocado.csv | shuf > avocado_shuf.csv
|
|
|
|
# head -n 14000 avocado_shuf.csv > avocado.data.train
|
|
|
|
# tail -n +14001 avocado_shuf.csv | head -n 2249 > avocado.data.valid
|
|
|
|
# tail -n 2000 avocado_shuf.csv > avocado.data.test
|
2022-03-27 23:34:18 +02:00
|
|
|
|
2022-04-03 11:34:18 +02:00
|
|
|
# Saving simple stats in a text file
|
2022-04-03 19:39:46 +02:00
|
|
|
# echo "Getting simple stats..."
|
|
|
|
# wc -l avocado.data* > results.txt
|