tau-2020-pytorch-tutorial/fairseq_introduction.ipynb
2021-01-19 20:13:09 +01:00

27 KiB

# https://github.com/pytorch/fairseq
# https://fairseq.readthedocs.io/en/latest/
!pip install fairseq
Requirement already satisfied: fairseq in /home/kuba/Syncthing/przedmioty/tau/fairseq (1.0.0a0+1164a7f)
Requirement already satisfied: cffi in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.14.0)
Requirement already satisfied: cython in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (0.29.21)
Requirement already satisfied: hydra-core<1.1 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.0.5)
Requirement already satisfied: omegaconf<2.1 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (2.0.5)
Requirement already satisfied: regex in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (2020.11.13)
Requirement already satisfied: sacrebleu>=1.4.12 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.4.14)
Requirement already satisfied: torch in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.7.0)
Requirement already satisfied: tqdm in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (4.54.1)
Requirement already satisfied: numpy in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.19.2)
Requirement already satisfied: pycparser in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from cffi->fairseq) (2.20)
Requirement already satisfied: importlib-resources; python_version < "3.9" in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from hydra-core<1.1->fairseq) (5.0.0)
Requirement already satisfied: antlr4-python3-runtime==4.8 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from hydra-core<1.1->fairseq) (4.8)
Requirement already satisfied: PyYAML>=5.1.* in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from omegaconf<2.1->fairseq) (5.4b2)
Requirement already satisfied: typing-extensions in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from omegaconf<2.1->fairseq) (3.7.4.3)
Requirement already satisfied: portalocker in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacrebleu>=1.4.12->fairseq) (2.0.0)
Requirement already satisfied: future in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from torch->fairseq) (0.18.2)
Requirement already satisfied: dataclasses in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from torch->fairseq) (0.6)
!pip install fastBPE regex requests sacremoses subword_nmt
Requirement already satisfied: fastBPE in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (0.1.0)
Requirement already satisfied: regex in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (2020.11.13)
Requirement already satisfied: requests in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (2.25.0)
Requirement already satisfied: sacremoses in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (0.0.43)
Requirement already satisfied: subword_nmt in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (0.3.7)
Requirement already satisfied: chardet<4,>=3.0.2 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests) (3.0.4)
Requirement already satisfied: certifi>=2017.4.17 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests) (2020.12.5)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests) (1.25.11)
Requirement already satisfied: idna<3,>=2.5 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests) (2.10)
Requirement already satisfied: tqdm in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacremoses) (4.54.1)
Requirement already satisfied: click in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacremoses) (7.1.2)
Requirement already satisfied: six in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacremoses) (1.15.0)
Requirement already satisfied: joblib in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacremoses) (0.17.0)
import torch, fairseq
# https://github.com/pytorch/fairseq/blob/master/examples/language_model/README.md
en_lm = torch.hub.load('pytorch/fairseq', 'transformer.wmt19.en-de.single_model')
Using cache found in /home/kuba/.cache/torch/hub/pytorch_fairseq_master
#en_lm.cuda()
en_lm.sample('Barack Obama', beam=1, sampling=True, sampling_topk=10, temperature=0.8)
'Barack Obama'
del en_lm
# https://github.com/pytorch/fairseq/tree/master/examples/roberta
roberta = torch.hub.load('pytorch/fairseq', 'roberta.base')
Using cache found in /home/kuba/.cache/torch/hub/pytorch_fairseq_master
# roberta.cuda()
roberta.eval()
RobertaHubInterface(
  (model): RobertaModel(
    (encoder): RobertaEncoder(
      (sentence_encoder): TransformerSentenceEncoder(
        (dropout_module): FairseqDropout()
        (embed_tokens): Embedding(50265, 768, padding_idx=1)
        (embed_positions): LearnedPositionalEmbedding(514, 768, padding_idx=1)
        (emb_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (layers): ModuleList(
          (0): TransformerSentenceEncoderLayer(
            (dropout_module): FairseqDropout()
            (activation_dropout_module): FairseqDropout()
            (self_attn): MultiheadAttention(
              (dropout_module): FairseqDropout()
              (k_proj): Linear(in_features=768, out_features=768, bias=True)
              (v_proj): Linear(in_features=768, out_features=768, bias=True)
              (q_proj): Linear(in_features=768, out_features=768, bias=True)
              (out_proj): Linear(in_features=768, out_features=768, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_features=768, bias=True)
            (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          )
          (1): TransformerSentenceEncoderLayer(
            (dropout_module): FairseqDropout()
            (activation_dropout_module): FairseqDropout()
            (self_attn): MultiheadAttention(
              (dropout_module): FairseqDropout()
              (k_proj): Linear(in_features=768, out_features=768, bias=True)
              (v_proj): Linear(in_features=768, out_features=768, bias=True)
              (q_proj): Linear(in_features=768, out_features=768, bias=True)
              (out_proj): Linear(in_features=768, out_features=768, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_features=768, bias=True)
            (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          )
          (2): TransformerSentenceEncoderLayer(
            (dropout_module): FairseqDropout()
            (activation_dropout_module): FairseqDropout()
            (self_attn): MultiheadAttention(
              (dropout_module): FairseqDropout()
              (k_proj): Linear(in_features=768, out_features=768, bias=True)
              (v_proj): Linear(in_features=768, out_features=768, bias=True)
              (q_proj): Linear(in_features=768, out_features=768, bias=True)
              (out_proj): Linear(in_features=768, out_features=768, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_features=768, bias=True)
            (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          )
          (3): TransformerSentenceEncoderLayer(
            (dropout_module): FairseqDropout()
            (activation_dropout_module): FairseqDropout()
            (self_attn): MultiheadAttention(
              (dropout_module): FairseqDropout()
              (k_proj): Linear(in_features=768, out_features=768, bias=True)
              (v_proj): Linear(in_features=768, out_features=768, bias=True)
              (q_proj): Linear(in_features=768, out_features=768, bias=True)
              (out_proj): Linear(in_features=768, out_features=768, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_features=768, bias=True)
            (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          )
          (4): TransformerSentenceEncoderLayer(
            (dropout_module): FairseqDropout()
            (activation_dropout_module): FairseqDropout()
            (self_attn): MultiheadAttention(
              (dropout_module): FairseqDropout()
              (k_proj): Linear(in_features=768, out_features=768, bias=True)
              (v_proj): Linear(in_features=768, out_features=768, bias=True)
              (q_proj): Linear(in_features=768, out_features=768, bias=True)
              (out_proj): Linear(in_features=768, out_features=768, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_features=768, bias=True)
            (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          )
          (5): TransformerSentenceEncoderLayer(
            (dropout_module): FairseqDropout()
            (activation_dropout_module): FairseqDropout()
            (self_attn): MultiheadAttention(
              (dropout_module): FairseqDropout()
              (k_proj): Linear(in_features=768, out_features=768, bias=True)
              (v_proj): Linear(in_features=768, out_features=768, bias=True)
              (q_proj): Linear(in_features=768, out_features=768, bias=True)
              (out_proj): Linear(in_features=768, out_features=768, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_features=768, bias=True)
            (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          )
          (6): TransformerSentenceEncoderLayer(
            (dropout_module): FairseqDropout()
            (activation_dropout_module): FairseqDropout()
            (self_attn): MultiheadAttention(
              (dropout_module): FairseqDropout()
              (k_proj): Linear(in_features=768, out_features=768, bias=True)
              (v_proj): Linear(in_features=768, out_features=768, bias=True)
              (q_proj): Linear(in_features=768, out_features=768, bias=True)
              (out_proj): Linear(in_features=768, out_features=768, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_features=768, bias=True)
            (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          )
          (7): TransformerSentenceEncoderLayer(
            (dropout_module): FairseqDropout()
            (activation_dropout_module): FairseqDropout()
            (self_attn): MultiheadAttention(
              (dropout_module): FairseqDropout()
              (k_proj): Linear(in_features=768, out_features=768, bias=True)
              (v_proj): Linear(in_features=768, out_features=768, bias=True)
              (q_proj): Linear(in_features=768, out_features=768, bias=True)
              (out_proj): Linear(in_features=768, out_features=768, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_features=768, bias=True)
            (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          )
          (8): TransformerSentenceEncoderLayer(
            (dropout_module): FairseqDropout()
            (activation_dropout_module): FairseqDropout()
            (self_attn): MultiheadAttention(
              (dropout_module): FairseqDropout()
              (k_proj): Linear(in_features=768, out_features=768, bias=True)
              (v_proj): Linear(in_features=768, out_features=768, bias=True)
              (q_proj): Linear(in_features=768, out_features=768, bias=True)
              (out_proj): Linear(in_features=768, out_features=768, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_features=768, bias=True)
            (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          )
          (9): TransformerSentenceEncoderLayer(
            (dropout_module): FairseqDropout()
            (activation_dropout_module): FairseqDropout()
            (self_attn): MultiheadAttention(
              (dropout_module): FairseqDropout()
              (k_proj): Linear(in_features=768, out_features=768, bias=True)
              (v_proj): Linear(in_features=768, out_features=768, bias=True)
              (q_proj): Linear(in_features=768, out_features=768, bias=True)
              (out_proj): Linear(in_features=768, out_features=768, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_features=768, bias=True)
            (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          )
          (10): TransformerSentenceEncoderLayer(
            (dropout_module): FairseqDropout()
            (activation_dropout_module): FairseqDropout()
            (self_attn): MultiheadAttention(
              (dropout_module): FairseqDropout()
              (k_proj): Linear(in_features=768, out_features=768, bias=True)
              (v_proj): Linear(in_features=768, out_features=768, bias=True)
              (q_proj): Linear(in_features=768, out_features=768, bias=True)
              (out_proj): Linear(in_features=768, out_features=768, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_features=768, bias=True)
            (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          )
          (11): TransformerSentenceEncoderLayer(
            (dropout_module): FairseqDropout()
            (activation_dropout_module): FairseqDropout()
            (self_attn): MultiheadAttention(
              (dropout_module): FairseqDropout()
              (k_proj): Linear(in_features=768, out_features=768, bias=True)
              (v_proj): Linear(in_features=768, out_features=768, bias=True)
              (q_proj): Linear(in_features=768, out_features=768, bias=True)
              (out_proj): Linear(in_features=768, out_features=768, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (fc1): Linear(in_features=768, out_features=3072, bias=True)
            (fc2): Linear(in_features=3072, out_features=768, bias=True)
            (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          )
        )
      )
      (lm_head): RobertaLMHead(
        (dense): Linear(in_features=768, out_features=768, bias=True)
        (layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      )
    )
    (classification_heads): ModuleDict()
  )
)
tokens = roberta.encode('Hello world!')
last_layer_features = roberta.extract_features(tokens)
all_layers = roberta.extract_features(tokens, return_all_hiddens=True)
assert torch.all(all_layers[-1] == last_layer_features)
roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli')
Using cache found in /home/kuba/.cache/torch/hub/pytorch_fairseq_master
# Encode a pair of sentences and make a prediction
tokens = roberta.encode('Roberta is a heavily optimized version of BERT.', 'Roberta is not very optimized.')
roberta.predict('mnli', tokens).argmax()  # 0: contradiction
# Encode another pair of sentences
tokens = roberta.encode('Roberta is a heavily optimized version of BERT.', 'Roberta is based on BERT.')
roberta.predict('mnli', tokens).argmax()  # 2: entailment
# contradiction - sprzeczność
# neutral
# entailment - wynikanie
roberta.fill_mask('The first Star wars movie came out in <mask>', topk=3)
roberta.fill_mask('Vikram samvat calender is official in <mask>', topk=3)
roberta.fill_mask('<mask> is the common currency of the European Union', topk=3)
del roberta
###### FAIRSEQ TRANSLATION https://pytorch.org/hub/pytorch_fairseq_translation/
en2de = torch.hub.load('pytorch/fairseq', 'transformer.wmt16.en-de',
                        tokenizer='moses', bpe='subword_nmt')
en2de.eval()
# en2de.cuda()
en2de.translate('Hello world!')
en2de.translate(['Hello world!', 'The cat sat on the mat.'])
en2de.translate(['Hello world!', 'The cat sat on the mat.'], beam=5)
en_toks = en2de.tokenize('Hello world!')
en_bpe = en2de.apply_bpe(en_toks)
en_bin = en2de.binarize(en_bpe)
de_bin = en2de.generate(en_bin, beam=5, sampling=True, sampling_topk=20)
de_sample = de_bin[0]['tokens']
de_bpe = en2de.string(de_sample)
de_toks = en2de.remove_bpe(de_bpe)
# trenowanie https://github.com/pytorch/fairseq/blob/master/examples/translation/README.md
del en2de