update splited datasets names
This commit is contained in:
parent
5c5ab52044
commit
8ec51f1c61
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +1,2 @@
|
||||
.idea
|
||||
*.csv
|
||||
|
@ -5,9 +5,9 @@ dataset_operation() {
|
||||
len2=$(($len1/10))
|
||||
len3=$(($len2*2))
|
||||
len4=$(($len3+1))
|
||||
head -n $len2 imdb_top_1000.csv.shuf > imdb_top_1000_test2.csv
|
||||
head -n $len3 imdb_top_1000.csv.shuf | tail -n $len2 > imdb_top_1000_dev2.csv
|
||||
tail -n +$len4 imdb_top_1000.csv.shuf > imdb_top_1000_train2.csv
|
||||
head -n $len2 imdb_top_1000.csv.shuf > imdb_top_1000_test.csv
|
||||
head -n $len3 imdb_top_1000.csv.shuf | tail -n $len2 > imdb_top_1000_dev.csv
|
||||
tail -n +$len4 imdb_top_1000.csv.shuf > imdb_top_1000_train.csv
|
||||
rm imdb_top_1000.csv.shuf
|
||||
wc -l imdb_top_1000.csv.*
|
||||
}
|
||||
|
@ -1,3 +1,3 @@
|
||||
wc -l imdb_top_1000_test.csv > stats.txt
|
||||
wc -l imdb_top_1000_dev.csv >> stats.txt
|
||||
wc -l imdb_top_1000_train.csv >> stats.txt
|
||||
wc -l data_test.csv > stats.txt
|
||||
wc -l data_train.csv >> stats.txt
|
||||
wc -l data_dev.csv >> stats.txt
|
Loading…
Reference in New Issue
Block a user