2021-03-27 17:28:33 +01:00
|
|
|
#!/bin/bash
|
|
|
|
if kaggle datasets download -d sgonkaggle/youtube-trend-with-subscriber && unzip youtube-trend-with-subscriber.zip; then
|
|
|
|
head -n 2 USvideos_modified.csv
|
|
|
|
grep -v -e "^$" - USvideos_modified.csv
|
|
|
|
COUNT=$(wc -l "USvideos_modified.csv")
|
|
|
|
echo "${COUNT}"
|
|
|
|
head -n -1 "USvideos_modified.csv" | shuf > "data_shuf"
|
2021-03-27 21:39:55 +01:00
|
|
|
head -n 544 "data_shuf" > "data_test"
|
|
|
|
head -n 1088 "data_shuf" | tail -n 544 > "data_dev"
|
|
|
|
head -n +1089 "data_shuf" > "data_train"
|
2021-03-27 17:28:33 +01:00
|
|
|
echo "Shuffled dataset"
|
2021-03-27 20:56:22 +01:00
|
|
|
wc -l "data_shuf"
|
2021-03-27 17:50:28 +01:00
|
|
|
echo "Test dataset"
|
2021-03-27 21:39:55 +01:00
|
|
|
wc -l "data_test"
|
|
|
|
echo "Dev dataset"
|
|
|
|
wc -l "data_dev"
|
2021-03-27 17:28:33 +01:00
|
|
|
echo "Train dataset"
|
2021-03-27 20:56:22 +01:00
|
|
|
wc -l "data_train"
|
2021-03-27 17:28:33 +01:00
|
|
|
python main.py USvideos_modified.csv
|
|
|
|
fi
|