14 lines
257 B
Bash
14 lines
257 B
Bash
|
#!/usr/bin/env bash
|
||
|
|
||
|
set -e
|
||
|
set -x
|
||
|
|
||
|
TEXT=data
|
||
|
fairseq-preprocess \
|
||
|
--only-source --nwordssrc 50000 \
|
||
|
--trainpref $TEXT/train.txt \
|
||
|
--validpref $TEXT/valid.txt \
|
||
|
--testpref $TEXT/test.txt \
|
||
|
--destdir data-bin/lm-spm-bpe \
|
||
|
--workers 8
|