ium_464979/kuggle_download.sh

31 lines
673 B
Bash
Raw Normal View History

#!/bin/bash
pip install kaggle
kaggle datasets download -d thedevastator/1-5-million-beer-reviews-from-beer-advocate
unzip -o 1-5-million-beer-reviews-from-beer-advocate.zip
shuf beer_reviews.csv > shuffled_dataset.csv
pwd
ls -a
split -l 80000 shuffled_dataset.csv train.csv
split -l 10000 train.csv dev.csv
mv shuffled_dataset.csv test.csv
head -n 1000 train.csv > train_head.csv
tail -n 1000 train.csv > train_tail.csv
if [ -n "$CUTOFF" ]; then
head -n "$CUTOFF" train.csv > train_cutoff.csv
fi
tar -czf artifacts.tar.gz train.csv dev.csv test.csv train_head.csv train_tail.csv train_cutoff.csv
rm $DATASET_FILE shuffled_dataset.csv
echo "artifacts.tar.gz"