ium_434765/get_data_simple.sh

13 lines
480 B
Bash
Raw Permalink Normal View History

2021-03-27 18:48:24 +01:00
#!/bin/bash
if kaggle datasets download -d sgonkaggle/youtube-trend-with-subscriber && unzip youtube-trend-with-subscriber.zip; then
head -n 2 USvideos_modified.csv
grep -v -e "^$" - USvideos_modified.csv
COUNT=$(wc -l "USvideos_modified.csv")
echo "${COUNT}"
head -n -1 "USvideos_modified.csv" | shuf > "data_shuf"
2021-03-27 21:39:55 +01:00
head -n "$1" "data_shuf" > "data_train"
2021-03-27 18:48:24 +01:00
echo "Shuffled dataset"
2021-03-27 21:00:42 +01:00
wc -l "data_shuf"
2021-03-27 18:48:24 +01:00
echo "Train dataset"
2021-03-27 21:00:42 +01:00
wc -l "data_train"
2021-03-27 18:48:24 +01:00
fi