#!/bin/bash pip install kaggle kaggle datasets download -d $DATASET_ID unzip -o $DATASET_FILE shuf $DATASET_FILE > shuffled_dataset.csv split -l 80000 shuffled_dataset.csv train.csv split -l 10000 train.csv dev.csv mv shuffled_dataset.csv test.csv head -n 1000 train.csv > train_head.csv tail -n 1000 train.csv > train_tail.csv if [ -n "$CUTOFF" ]; then head -n "$CUTOFF" train.csv > train_cutoff.csv fi tar -czf artifacts.tar.gz train.csv dev.csv test.csv train_head.csv train_tail.csv train_cutoff.csv rm $DATASET_FILE shuffled_dataset.csv echo "artifacts.tar.gz"