mkdir -p gpt2_bpe wget -O gpt2_bpe/encoder.json https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/encoder.json wget -O gpt2_bpe/vocab.bpe https://dl.fbaipublicfiles.com/fairseq/gpt2_bpe/vocab.bpe for SPLIT in train valid test; do \ python -m multiprocessing_bpe_encoder \ --encoder-json gpt2_bpe/encoder.json \ --vocab-bpe gpt2_bpe/vocab.bpe \ --inputs wikitext-103-raw/wiki.${SPLIT}.raw \ --outputs wikitext-103-raw/wiki.${SPLIT}.bpe \ --keep-empty \ --workers 10; \ done