20 lines
435 B
Bash
20 lines
435 B
Bash
|
#!/usr/bin/env bash
|
||
|
|
||
|
set -e
|
||
|
set -x
|
||
|
|
||
|
TEXT=data/
|
||
|
fairseq-preprocess \
|
||
|
--only-source --nwordssrc 50000 \
|
||
|
--trainpref $TEXT/train.txt \
|
||
|
--validpref $TEXT/valid.txt \
|
||
|
--destdir data-bin/classifier-spm-bpe/input0 \
|
||
|
--workers 8
|
||
|
|
||
|
fairseq-preprocess \
|
||
|
--only-source \
|
||
|
--trainpref $TEXT/train/expected.tsv \
|
||
|
--validpref $TEXT/dev-0/expected.tsv \
|
||
|
--destdir data-bin/classifier-spm-bpe/label \
|
||
|
--workers 8
|