diff --git a/.gitignore b/.gitignore index 485dee6..da47d65 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ .idea +*.csv diff --git a/lab2/download.sh b/lab2/download.sh index 1f6ec76..8396634 100644 --- a/lab2/download.sh +++ b/lab2/download.sh @@ -5,9 +5,9 @@ dataset_operation() { len2=$(($len1/10)) len3=$(($len2*2)) len4=$(($len3+1)) - head -n $len2 imdb_top_1000.csv.shuf > imdb_top_1000_test2.csv - head -n $len3 imdb_top_1000.csv.shuf | tail -n $len2 > imdb_top_1000_dev2.csv - tail -n +$len4 imdb_top_1000.csv.shuf > imdb_top_1000_train2.csv + head -n $len2 imdb_top_1000.csv.shuf > imdb_top_1000_test.csv + head -n $len3 imdb_top_1000.csv.shuf | tail -n $len2 > imdb_top_1000_dev.csv + tail -n +$len4 imdb_top_1000.csv.shuf > imdb_top_1000_train.csv rm imdb_top_1000.csv.shuf wc -l imdb_top_1000.csv.* } diff --git a/lab2/stats.sh b/lab2/stats.sh index 1d87888..d316f7d 100644 --- a/lab2/stats.sh +++ b/lab2/stats.sh @@ -1,3 +1,3 @@ -wc -l imdb_top_1000_test.csv > stats.txt -wc -l imdb_top_1000_dev.csv >> stats.txt -wc -l imdb_top_1000_train.csv >> stats.txt \ No newline at end of file +wc -l data_test.csv > stats.txt +wc -l data_train.csv >> stats.txt +wc -l data_dev.csv >> stats.txt \ No newline at end of file