Fix screen problem
This commit is contained in:
parent
8af8ddbde5
commit
e6775e57f7
@ -3,7 +3,6 @@ apt update -y
|
|||||||
apt upgrade
|
apt upgrade
|
||||||
|
|
||||||
apt-get install python3 python3-pip python3-venv unzip gawk screen
|
apt-get install python3 python3-pip python3-venv unzip gawk screen
|
||||||
screen -S generate_data
|
|
||||||
python3 -m venv env
|
python3 -m venv env
|
||||||
source ./env/bin/activate
|
source ./env/bin/activate
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
@ -13,3 +12,4 @@ mkdir data && cd data
|
|||||||
wget https://minio.clarin-pl.eu/ermlab/public/PoLitBert/corpus-oscar/corpus_oscar_2020-04-10_64M_lines.zip
|
wget https://minio.clarin-pl.eu/ermlab/public/PoLitBert/corpus-oscar/corpus_oscar_2020-04-10_64M_lines.zip
|
||||||
unzip -p corpus_oscar_2020-04-10_64M_lines.zip | sed -r '/^\s*$/d' | gawk 'NF>6' > oscar_filtered.txt
|
unzip -p corpus_oscar_2020-04-10_64M_lines.zip | sed -r '/^\s*$/d' | gawk 'NF>6' > oscar_filtered.txt
|
||||||
split -l 1000000 --numeric-suffixes=1 --suffix-length=1 --additional-suffix=".txt" oscar_filtered.txt ""
|
split -l 1000000 --numeric-suffixes=1 --suffix-length=1 --additional-suffix=".txt" oscar_filtered.txt ""
|
||||||
|
screen -S generate_synt_data
|
Loading…
Reference in New Issue
Block a user