diff --git a/download_dataset.sh b/download_dataset.sh index dc9259b..69769c9 100644 --- a/download_dataset.sh +++ b/download_dataset.sh @@ -19,8 +19,8 @@ num_train=$((total_lines - (num_test * 2))) num_validation=$num_test head -n $num_train cutoff_spotify.csv > train.csv -tail -n $num_test cutoff_spotify.csv | head -n $num_validation > validation.csv -tail -n $num_test cutoff_spotify.csv | tail -n +$(($num_validation + 1)) > test.csv +tail -n $((num_test+num_validation)) cutoff_spotify.csv | head -n $num_test > test.csv +tail -n $num_validation cutoff_spotify.csv > validation.csv mkdir -p artifacts mv Spotify_Dataset.csv cutoff_spotify.csv train.csv validation.csv test.csv artifacts/