#!/bin/bash figlet "Welcome $KAGGLE_USERNAME" # Clean the previous files rm -r avocado.data* echo "Removed previous data files" # Install kaggle and python modules # pip3 install --user kaggle # pip3 install --user pandas # Download the data echo "Loading dataset..." kaggle datasets download -d neuromusic/avocado-prices echo "Extracting files from zip archive..." unzip -o avocado-prices.zip rm avocado-prizes.zip mkdir data mv avocado.csv data/. echo Done # Dividing data # echo "Start the data splitting..." # tail -n +2 avocado.csv | shuf > avocado_shuf.csv # head -n 14000 avocado_shuf.csv > avocado.data.train # tail -n +14001 avocado_shuf.csv | head -n 2249 > avocado.data.valid # tail -n 2000 avocado_shuf.csv > avocado.data.test # Saving simple stats in a text file # echo "Getting simple stats..." # wc -l avocado.data* > results.txt