Fix screen problem

This commit is contained in:
Wojciech Jarmosz 2022-04-24 21:23:32 +02:00
parent 8af8ddbde5
commit e6775e57f7

View File

@ -3,7 +3,6 @@ apt update -y
apt upgrade
apt-get install python3 python3-pip python3-venv unzip gawk screen
screen -S generate_data
python3 -m venv env
source ./env/bin/activate
pip install -r requirements.txt
@ -12,4 +11,5 @@ python -m spacy download pl_core_news_lg
mkdir data && cd data
wget https://minio.clarin-pl.eu/ermlab/public/PoLitBert/corpus-oscar/corpus_oscar_2020-04-10_64M_lines.zip
unzip -p corpus_oscar_2020-04-10_64M_lines.zip | sed -r '/^\s*$/d' | gawk 'NF>6' > oscar_filtered.txt
split -l 1000000 --numeric-suffixes=1 --suffix-length=1 --additional-suffix=".txt" oscar_filtered.txt ""
split -l 1000000 --numeric-suffixes=1 --suffix-length=1 --additional-suffix=".txt" oscar_filtered.txt ""
screen -S generate_synt_data