diff --git a/data_download.sh b/data_download.sh index b1f5360..d10be49 100755 --- a/data_download.sh +++ b/data_download.sh @@ -3,7 +3,7 @@ pip install kaggle kaggle datasets download ruchi798/movies-on-netflix-prime-video-hulu-and-disney --unzip sed -i '/^$/d' ./MoviesOnStreamingPlatforms_updated.csv -shuf ./MoviesOnStreamingPlatforms_updated.csv | tail -n +"${CUTOFF}" > ./MoviesOnStreamingPlatforms_updated.shuf +shuf ./MoviesOnStreamingPlatforms_updated.csv | tail -n +$((${CUTOFF}+1)) > ./MoviesOnStreamingPlatforms_updated.shuf head -n 1674 ./MoviesOnStreamingPlatforms_updated.shuf > ./MoviesOnStreamingPlatforms_updated.test head -n 3348 ./MoviesOnStreamingPlatforms_updated.shuf | tail -n 1674 > ./MoviesOnStreamingPlatforms_updated.dev tail -n +3349 ./MoviesOnStreamingPlatforms_updated.shuf > ./MoviesOnStreamingPlatforms_updated.train