#!/bin/bash pip install kaggle --upgrade kaggle datasets download -d gulczas/spotify-dataset unzip -o spotify-dataset.zip echo "------------------ Shufle ------------------" shuf Spotify_Dataset.csv -o shuffled_spotify.csv echo "------------------ Cut off to top $1 rows ------------------" head -n $1 shuffled_spotify.csv > cutoff_spotify.csv echo "------------------ Split ------------------" total_lines=$(wc -l < cutoff_spotify.csv) num_test=$((total_lines / 10)) num_train=$((total_lines - (num_test * 2))) num_validation=$num_test head -n $num_train cutoff_spotify.csv > train.csv tail -n $((num_test+num_validation)) cutoff_spotify.csv | head -n $num_test > test.csv tail -n $num_validation cutoff_spotify.csv > validation.csv mkdir -p artifacts mv Spotify_Dataset.csv cutoff_spotify.csv train.csv validation.csv test.csv artifacts/