ium_478841/scripts/load_data.sh
2022-04-03 20:17:21 +02:00

29 lines
850 B
Bash

#!/bin/bash
figlet "Welcome $KAGGLE_USERNAME"
# Clean the previous files
rm -r avocado.data*
echo "Removed previous data files"
# Install kaggle and python modules
# pip3 install --user kaggle
# pip3 install --user pandas
# Download the data
echo "Loading dataset..."
kaggle datasets download -d neuromusic/avocado-prices
echo "Extracting files from zip archive..."
unzip -o avocado-prices.zip
rm avocado-prizes.zip
mkdir data
mv avocado.csv data/.
echo Done
# Dividing data
# echo "Start the data splitting..."
# tail -n +2 avocado.csv | shuf > avocado_shuf.csv
# head -n 14000 avocado_shuf.csv > avocado.data.train
# tail -n +14001 avocado_shuf.csv | head -n 2249 > avocado.data.valid
# tail -n 2000 avocado_shuf.csv > avocado.data.test
# Saving simple stats in a text file
# echo "Getting simple stats..."
# wc -l avocado.data* > results.txt