diff --git a/get_data_simple.sh b/get_data_simple.sh index 4c055fb..ade81eb 100644 --- a/get_data_simple.sh +++ b/get_data_simple.sh @@ -5,15 +5,15 @@ if kaggle datasets download -d sgonkaggle/youtube-trend-with-subscriber && unzip COUNT=$(wc -l "USvideos_modified.csv") echo "${COUNT}" head -n -1 "USvideos_modified.csv" | shuf > "data_shuf" - head -n 544 data_shuf > data_test - head -n 1088 data_shuf | tail -n 544 > data_dev - head -n +1089 data_shuf > data_train + head -n 544 "data_shuf" > "data_test" + head -n 1088 "data_shuf" | tail -n 544 > "data_dev" + head -n +1089 "data_shuf" > "data_train" echo "Shuffled dataset" - wc -l data_shuf + wc -l "data_shuf" echo "Test dataset" - wc -l data_test + wc -l "data_test" echo "Dev dataset" - wc -l data_dev + wc -l "data_dev" echo "Train dataset" - wc -l data_train + wc -l "data_train" fi \ No newline at end of file