27 KiB
27 KiB
# https://github.com/pytorch/fairseq
# https://fairseq.readthedocs.io/en/latest/
!pip install fairseq
Requirement already satisfied: fairseq in /home/kuba/Syncthing/przedmioty/tau/fairseq (1.0.0a0+1164a7f) Requirement already satisfied: cffi in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.14.0) Requirement already satisfied: cython in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (0.29.21) Requirement already satisfied: hydra-core<1.1 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.0.5) Requirement already satisfied: omegaconf<2.1 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (2.0.5) Requirement already satisfied: regex in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (2020.11.13) Requirement already satisfied: sacrebleu>=1.4.12 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.4.14) Requirement already satisfied: torch in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.7.0) Requirement already satisfied: tqdm in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (4.54.1) Requirement already satisfied: numpy in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.19.2) Requirement already satisfied: pycparser in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from cffi->fairseq) (2.20) Requirement already satisfied: importlib-resources; python_version < "3.9" in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from hydra-core<1.1->fairseq) (5.0.0) Requirement already satisfied: antlr4-python3-runtime==4.8 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from hydra-core<1.1->fairseq) (4.8) Requirement already satisfied: PyYAML>=5.1.* in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from omegaconf<2.1->fairseq) (5.4b2) Requirement already satisfied: typing-extensions in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from omegaconf<2.1->fairseq) (3.7.4.3) Requirement already satisfied: portalocker in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacrebleu>=1.4.12->fairseq) (2.0.0) Requirement already satisfied: future in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from torch->fairseq) (0.18.2) Requirement already satisfied: dataclasses in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from torch->fairseq) (0.6)
!pip install fastBPE regex requests sacremoses subword_nmt
Requirement already satisfied: fastBPE in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (0.1.0) Requirement already satisfied: regex in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (2020.11.13) Requirement already satisfied: requests in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (2.25.0) Requirement already satisfied: sacremoses in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (0.0.43) Requirement already satisfied: subword_nmt in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (0.3.7) Requirement already satisfied: chardet<4,>=3.0.2 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests) (3.0.4) Requirement already satisfied: certifi>=2017.4.17 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests) (2020.12.5) Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests) (1.25.11) Requirement already satisfied: idna<3,>=2.5 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests) (2.10) Requirement already satisfied: tqdm in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacremoses) (4.54.1) Requirement already satisfied: click in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacremoses) (7.1.2) Requirement already satisfied: six in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacremoses) (1.15.0) Requirement already satisfied: joblib in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacremoses) (0.17.0)
import torch, fairseq
# https://github.com/pytorch/fairseq/blob/master/examples/language_model/README.md
en_lm = torch.hub.load('pytorch/fairseq', 'transformer.wmt19.en-de.single_model')
Using cache found in /home/kuba/.cache/torch/hub/pytorch_fairseq_master
#en_lm.cuda()
en_lm.sample('Barack Obama', beam=1, sampling=True, sampling_topk=10, temperature=0.8)
'Barack Obama'
del en_lm
# https://github.com/pytorch/fairseq/tree/master/examples/roberta
roberta = torch.hub.load('pytorch/fairseq', 'roberta.base')
Using cache found in /home/kuba/.cache/torch/hub/pytorch_fairseq_master
# roberta.cuda()
roberta.eval()
RobertaHubInterface( (model): RobertaModel( (encoder): RobertaEncoder( (sentence_encoder): TransformerSentenceEncoder( (dropout_module): FairseqDropout() (embed_tokens): Embedding(50265, 768, padding_idx=1) (embed_positions): LearnedPositionalEmbedding(514, 768, padding_idx=1) (emb_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) (layers): ModuleList( (0): TransformerSentenceEncoderLayer( (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) ) (1): TransformerSentenceEncoderLayer( (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) ) (2): TransformerSentenceEncoderLayer( (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) ) (3): TransformerSentenceEncoderLayer( (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) ) (4): TransformerSentenceEncoderLayer( (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) ) (5): TransformerSentenceEncoderLayer( (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) ) (6): TransformerSentenceEncoderLayer( (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) ) (7): TransformerSentenceEncoderLayer( (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) ) (8): TransformerSentenceEncoderLayer( (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) ) (9): TransformerSentenceEncoderLayer( (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) ) (10): TransformerSentenceEncoderLayer( (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) ) (11): TransformerSentenceEncoderLayer( (dropout_module): FairseqDropout() (activation_dropout_module): FairseqDropout() (self_attn): MultiheadAttention( (dropout_module): FairseqDropout() (k_proj): Linear(in_features=768, out_features=768, bias=True) (v_proj): Linear(in_features=768, out_features=768, bias=True) (q_proj): Linear(in_features=768, out_features=768, bias=True) (out_proj): Linear(in_features=768, out_features=768, bias=True) ) (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) (fc1): Linear(in_features=768, out_features=3072, bias=True) (fc2): Linear(in_features=3072, out_features=768, bias=True) (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) ) ) ) (lm_head): RobertaLMHead( (dense): Linear(in_features=768, out_features=768, bias=True) (layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True) ) ) (classification_heads): ModuleDict() ) )
tokens = roberta.encode('Hello world!')
last_layer_features = roberta.extract_features(tokens)
all_layers = roberta.extract_features(tokens, return_all_hiddens=True)
assert torch.all(all_layers[-1] == last_layer_features)
roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli')
Using cache found in /home/kuba/.cache/torch/hub/pytorch_fairseq_master
# Encode a pair of sentences and make a prediction
tokens = roberta.encode('Roberta is a heavily optimized version of BERT.', 'Roberta is not very optimized.')
roberta.predict('mnli', tokens).argmax() # 0: contradiction
# Encode another pair of sentences
tokens = roberta.encode('Roberta is a heavily optimized version of BERT.', 'Roberta is based on BERT.')
roberta.predict('mnli', tokens).argmax() # 2: entailment
# contradiction - sprzeczność
# neutral
# entailment - wynikanie
roberta.fill_mask('The first Star wars movie came out in <mask>', topk=3)
roberta.fill_mask('Vikram samvat calender is official in <mask>', topk=3)
roberta.fill_mask('<mask> is the common currency of the European Union', topk=3)
del roberta
###### FAIRSEQ TRANSLATION https://pytorch.org/hub/pytorch_fairseq_translation/
en2de = torch.hub.load('pytorch/fairseq', 'transformer.wmt16.en-de',
tokenizer='moses', bpe='subword_nmt')
en2de.eval()
# en2de.cuda()
en2de.translate('Hello world!')
en2de.translate(['Hello world!', 'The cat sat on the mat.'])
en2de.translate(['Hello world!', 'The cat sat on the mat.'], beam=5)
en_toks = en2de.tokenize('Hello world!')
en_bpe = en2de.apply_bpe(en_toks)
en_bin = en2de.binarize(en_bpe)
de_bin = en2de.generate(en_bin, beam=5, sampling=True, sampling_topk=20)
de_sample = de_bin[0]['tokens']
de_bpe = en2de.string(de_sample)
de_toks = en2de.remove_bpe(de_bpe)
# trenowanie https://github.com/pytorch/fairseq/blob/master/examples/translation/README.md
del en2de