ium_444018/download.sh

17 lines
656 B
Bash
Raw Normal View History

2022-03-27 17:43:59 +02:00
dataset_operation() {
2022-03-27 22:41:36 +02:00
tail -n +2 all_games.csv | shuf > all_games.csv.s
head -n $CUTOFF all_games.csv.s > ./all_games.csv.shuf
len1=$(cat ./all_games.csv.shuf | wc -l)
2022-03-27 17:43:59 +02:00
len2=$(($len1/10))
len3=$(($len2*2))
len4=$(($len3+1))
2022-03-27 22:41:36 +02:00
head -n $len2 all_games.csv.shuf > all_games.csv.test
head -n $len3 all_games.csv.shuf | tail -n $len2 > all_games.csv.dev
tail -n +$len4 all_games.csv.shuf > all_games.csv.train
rm all_games.csv.shuf
wc -l all_games.csv.*
2022-03-27 17:43:59 +02:00
}
2022-03-27 22:49:35 +02:00
kaggle datasets download -d harshitshankhdhar/imdb-dataset-of-top-1000-movies-and-tv-shows
unzip imdb-dataset-of-top-1000-movies-and-tv-shows.zip
2022-03-27 22:41:36 +02:00
dataset_operation