10 lines
361 B
Bash
10 lines
361 B
Bash
|
wget -O gpt2_bpe/dict.txt https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/dict.txt
|
||
|
fairseq-preprocess \
|
||
|
--only-source \
|
||
|
--srcdict gpt2_bpe/dict.txt \
|
||
|
--trainpref wikitext-103-raw/wiki.train.bpe \
|
||
|
--validpref wikitext-103-raw/wiki.valid.bpe \
|
||
|
--testpref wikitext-103-raw/wiki.test.bpe \
|
||
|
--destdir data-bin/wikitext-103 \
|
||
|
--workers 10
|