update splited datasets names

This commit is contained in:
Szymon Parafiński 2022-04-03 23:55:35 +02:00
parent 5c5ab52044
commit 8ec51f1c61
3 changed files with 7 additions and 6 deletions

1
.gitignore vendored
View File

@ -1 +1,2 @@
.idea .idea
*.csv

View File

@ -5,9 +5,9 @@ dataset_operation() {
len2=$(($len1/10)) len2=$(($len1/10))
len3=$(($len2*2)) len3=$(($len2*2))
len4=$(($len3+1)) len4=$(($len3+1))
head -n $len2 imdb_top_1000.csv.shuf > imdb_top_1000_test2.csv head -n $len2 imdb_top_1000.csv.shuf > imdb_top_1000_test.csv
head -n $len3 imdb_top_1000.csv.shuf | tail -n $len2 > imdb_top_1000_dev2.csv head -n $len3 imdb_top_1000.csv.shuf | tail -n $len2 > imdb_top_1000_dev.csv
tail -n +$len4 imdb_top_1000.csv.shuf > imdb_top_1000_train2.csv tail -n +$len4 imdb_top_1000.csv.shuf > imdb_top_1000_train.csv
rm imdb_top_1000.csv.shuf rm imdb_top_1000.csv.shuf
wc -l imdb_top_1000.csv.* wc -l imdb_top_1000.csv.*
} }

View File

@ -1,3 +1,3 @@
wc -l imdb_top_1000_test.csv > stats.txt wc -l data_test.csv > stats.txt
wc -l imdb_top_1000_dev.csv >> stats.txt wc -l data_train.csv >> stats.txt
wc -l imdb_top_1000_train.csv >> stats.txt wc -l data_dev.csv >> stats.txt