diff --git a/install.sh b/install.sh index ba28409..b433f8f 100644 --- a/install.sh +++ b/install.sh @@ -11,5 +11,4 @@ python -m spacy download pl_core_news_lg mkdir data && cd data wget https://minio.clarin-pl.eu/ermlab/public/PoLitBert/corpus-oscar/corpus_oscar_2020-04-10_64M_lines.zip unzip -p corpus_oscar_2020-04-10_64M_lines.zip | sed -r '/^\s*$/d' | gawk 'NF>6' > oscar_filtered.txt -split -l 1000000 --numeric-suffixes=1 --suffix-length=1 --additional-suffix=".txt" oscar_filtered.txt "" -screen -S generate_synt_data \ No newline at end of file +split -l 1000000 --numeric-suffixes=1 --suffix-length=1 --additional-suffix=".txt" oscar_filtered.txt "" \ No newline at end of file