ium_434765/get_data.sh

13 lines
559 B
Bash
Raw Normal View History

2021-03-27 17:28:33 +01:00
#!/bin/bash
2021-05-17 20:46:02 +02:00
rm USvideos_modified.csv
2021-03-27 17:28:33 +01:00
if kaggle datasets download -d sgonkaggle/youtube-trend-with-subscriber && unzip youtube-trend-with-subscriber.zip; then
grep -v -e "^$" - USvideos_modified.csv
COUNT=$(wc -l "USvideos_modified.csv")
echo "${COUNT}"
head -n -1 "USvideos_modified.csv" | shuf > "data_shuf"
2021-03-27 21:39:55 +01:00
head -n 544 "data_shuf" > "data_test"
head -n 1088 "data_shuf" | tail -n 544 > "data_dev"
2021-05-17 19:24:30 +02:00
head -n 5441 "data_shuf" | tail -n 4352 > "data_train"
2021-05-17 20:50:29 +02:00
sed '/^\n$/d' "data_dev"
2021-04-24 21:18:57 +02:00
python3 get_data.py USvideos_modified.csv
2021-03-27 17:28:33 +01:00
fi