add fairseq_introduction.ipynb
This commit is contained in:
parent
0f409ae23b
commit
b92722a314
722
fairseq_introduction.ipynb
Normal file
722
fairseq_introduction.ipynb
Normal file
@ -0,0 +1,722 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# https://github.com/pytorch/fairseq"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# https://fairseq.readthedocs.io/en/latest/"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Requirement already satisfied: fairseq in /home/kuba/Syncthing/przedmioty/tau/fairseq (1.0.0a0+1164a7f)\n",
|
||||||
|
"Requirement already satisfied: cffi in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.14.0)\n",
|
||||||
|
"Requirement already satisfied: cython in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (0.29.21)\n",
|
||||||
|
"Requirement already satisfied: hydra-core<1.1 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.0.5)\n",
|
||||||
|
"Requirement already satisfied: omegaconf<2.1 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (2.0.5)\n",
|
||||||
|
"Requirement already satisfied: regex in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (2020.11.13)\n",
|
||||||
|
"Requirement already satisfied: sacrebleu>=1.4.12 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.4.14)\n",
|
||||||
|
"Requirement already satisfied: torch in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.7.0)\n",
|
||||||
|
"Requirement already satisfied: tqdm in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (4.54.1)\n",
|
||||||
|
"Requirement already satisfied: numpy in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.19.2)\n",
|
||||||
|
"Requirement already satisfied: pycparser in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from cffi->fairseq) (2.20)\n",
|
||||||
|
"Requirement already satisfied: importlib-resources; python_version < \"3.9\" in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from hydra-core<1.1->fairseq) (5.0.0)\n",
|
||||||
|
"Requirement already satisfied: antlr4-python3-runtime==4.8 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from hydra-core<1.1->fairseq) (4.8)\n",
|
||||||
|
"Requirement already satisfied: PyYAML>=5.1.* in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from omegaconf<2.1->fairseq) (5.4b2)\n",
|
||||||
|
"Requirement already satisfied: typing-extensions in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from omegaconf<2.1->fairseq) (3.7.4.3)\n",
|
||||||
|
"Requirement already satisfied: portalocker in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacrebleu>=1.4.12->fairseq) (2.0.0)\n",
|
||||||
|
"Requirement already satisfied: future in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from torch->fairseq) (0.18.2)\n",
|
||||||
|
"Requirement already satisfied: dataclasses in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from torch->fairseq) (0.6)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"!pip install fairseq"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Requirement already satisfied: fastBPE in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (0.1.0)\n",
|
||||||
|
"Requirement already satisfied: regex in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (2020.11.13)\n",
|
||||||
|
"Requirement already satisfied: requests in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (2.25.0)\n",
|
||||||
|
"Requirement already satisfied: sacremoses in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (0.0.43)\n",
|
||||||
|
"Requirement already satisfied: subword_nmt in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (0.3.7)\n",
|
||||||
|
"Requirement already satisfied: chardet<4,>=3.0.2 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests) (3.0.4)\n",
|
||||||
|
"Requirement already satisfied: certifi>=2017.4.17 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests) (2020.12.5)\n",
|
||||||
|
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests) (1.25.11)\n",
|
||||||
|
"Requirement already satisfied: idna<3,>=2.5 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests) (2.10)\n",
|
||||||
|
"Requirement already satisfied: tqdm in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacremoses) (4.54.1)\n",
|
||||||
|
"Requirement already satisfied: click in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacremoses) (7.1.2)\n",
|
||||||
|
"Requirement already satisfied: six in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacremoses) (1.15.0)\n",
|
||||||
|
"Requirement already satisfied: joblib in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacremoses) (0.17.0)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"!pip install fastBPE regex requests sacremoses subword_nmt"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import torch, fairseq"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# https://github.com/pytorch/fairseq/blob/master/examples/language_model/README.md"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Using cache found in /home/kuba/.cache/torch/hub/pytorch_fairseq_master\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"en_lm = torch.hub.load('pytorch/fairseq', 'transformer.wmt19.en-de.single_model')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"#en_lm.cuda()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'Barack Obama'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 9,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"en_lm.sample('Barack Obama', beam=1, sampling=True, sampling_topk=10, temperature=0.8)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"del en_lm"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# https://github.com/pytorch/fairseq/tree/master/examples/roberta"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Using cache found in /home/kuba/.cache/torch/hub/pytorch_fairseq_master\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"roberta = torch.hub.load('pytorch/fairseq', 'roberta.base')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 13,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# roberta.cuda()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 14,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"RobertaHubInterface(\n",
|
||||||
|
" (model): RobertaModel(\n",
|
||||||
|
" (encoder): RobertaEncoder(\n",
|
||||||
|
" (sentence_encoder): TransformerSentenceEncoder(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (embed_tokens): Embedding(50265, 768, padding_idx=1)\n",
|
||||||
|
" (embed_positions): LearnedPositionalEmbedding(514, 768, padding_idx=1)\n",
|
||||||
|
" (emb_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" (layers): ModuleList(\n",
|
||||||
|
" (0): TransformerSentenceEncoderLayer(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (activation_dropout_module): FairseqDropout()\n",
|
||||||
|
" (self_attn): MultiheadAttention(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (k_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (v_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (q_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" (fc1): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||||||
|
" (fc2): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||||||
|
" (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (1): TransformerSentenceEncoderLayer(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (activation_dropout_module): FairseqDropout()\n",
|
||||||
|
" (self_attn): MultiheadAttention(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (k_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (v_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (q_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" (fc1): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||||||
|
" (fc2): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||||||
|
" (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (2): TransformerSentenceEncoderLayer(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (activation_dropout_module): FairseqDropout()\n",
|
||||||
|
" (self_attn): MultiheadAttention(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (k_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (v_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (q_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" (fc1): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||||||
|
" (fc2): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||||||
|
" (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (3): TransformerSentenceEncoderLayer(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (activation_dropout_module): FairseqDropout()\n",
|
||||||
|
" (self_attn): MultiheadAttention(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (k_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (v_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (q_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" (fc1): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||||||
|
" (fc2): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||||||
|
" (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (4): TransformerSentenceEncoderLayer(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (activation_dropout_module): FairseqDropout()\n",
|
||||||
|
" (self_attn): MultiheadAttention(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (k_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (v_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (q_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" (fc1): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||||||
|
" (fc2): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||||||
|
" (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (5): TransformerSentenceEncoderLayer(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (activation_dropout_module): FairseqDropout()\n",
|
||||||
|
" (self_attn): MultiheadAttention(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (k_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (v_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (q_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" (fc1): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||||||
|
" (fc2): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||||||
|
" (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (6): TransformerSentenceEncoderLayer(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (activation_dropout_module): FairseqDropout()\n",
|
||||||
|
" (self_attn): MultiheadAttention(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (k_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (v_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (q_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" (fc1): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||||||
|
" (fc2): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||||||
|
" (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (7): TransformerSentenceEncoderLayer(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (activation_dropout_module): FairseqDropout()\n",
|
||||||
|
" (self_attn): MultiheadAttention(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (k_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (v_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (q_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" (fc1): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||||||
|
" (fc2): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||||||
|
" (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (8): TransformerSentenceEncoderLayer(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (activation_dropout_module): FairseqDropout()\n",
|
||||||
|
" (self_attn): MultiheadAttention(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (k_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (v_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (q_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" (fc1): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||||||
|
" (fc2): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||||||
|
" (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (9): TransformerSentenceEncoderLayer(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (activation_dropout_module): FairseqDropout()\n",
|
||||||
|
" (self_attn): MultiheadAttention(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (k_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (v_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (q_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" (fc1): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||||||
|
" (fc2): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||||||
|
" (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (10): TransformerSentenceEncoderLayer(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (activation_dropout_module): FairseqDropout()\n",
|
||||||
|
" (self_attn): MultiheadAttention(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (k_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (v_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (q_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" (fc1): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||||||
|
" (fc2): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||||||
|
" (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (11): TransformerSentenceEncoderLayer(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (activation_dropout_module): FairseqDropout()\n",
|
||||||
|
" (self_attn): MultiheadAttention(\n",
|
||||||
|
" (dropout_module): FairseqDropout()\n",
|
||||||
|
" (k_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (v_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (q_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (out_proj): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" (fc1): Linear(in_features=768, out_features=3072, bias=True)\n",
|
||||||
|
" (fc2): Linear(in_features=3072, out_features=768, bias=True)\n",
|
||||||
|
" (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" (lm_head): RobertaLMHead(\n",
|
||||||
|
" (dense): Linear(in_features=768, out_features=768, bias=True)\n",
|
||||||
|
" (layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" (classification_heads): ModuleDict()\n",
|
||||||
|
" )\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 14,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"roberta.eval()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 15,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tokens = roberta.encode('Hello world!')\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 16,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"last_layer_features = roberta.extract_features(tokens)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 17,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"all_layers = roberta.extract_features(tokens, return_all_hiddens=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 18,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"assert torch.all(all_layers[-1] == last_layer_features)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Using cache found in /home/kuba/.cache/torch/hub/pytorch_fairseq_master\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Encode a pair of sentences and make a prediction\n",
|
||||||
|
"tokens = roberta.encode('Roberta is a heavily optimized version of BERT.', 'Roberta is not very optimized.')\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"roberta.predict('mnli', tokens).argmax() # 0: contradiction"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# Encode another pair of sentences\n",
|
||||||
|
"tokens = roberta.encode('Roberta is a heavily optimized version of BERT.', 'Roberta is based on BERT.')\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"roberta.predict('mnli', tokens).argmax() # 2: entailment"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# contradiction - sprzeczność\n",
|
||||||
|
"# neutral\n",
|
||||||
|
"# entailment - wynikanie\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"roberta.fill_mask('The first Star wars movie came out in <mask>', topk=3)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"roberta.fill_mask('Vikram samvat calender is official in <mask>', topk=3)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"roberta.fill_mask('<mask> is the common currency of the European Union', topk=3)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"del roberta"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"###### FAIRSEQ TRANSLATION https://pytorch.org/hub/pytorch_fairseq_translation/"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"en2de = torch.hub.load('pytorch/fairseq', 'transformer.wmt16.en-de',\n",
|
||||||
|
" tokenizer='moses', bpe='subword_nmt')\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"en2de.eval()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# en2de.cuda()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"en2de.translate('Hello world!')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"en2de.translate(['Hello world!', 'The cat sat on the mat.'])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"en2de.translate(['Hello world!', 'The cat sat on the mat.'], beam=5)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"en_toks = en2de.tokenize('Hello world!')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"en_bpe = en2de.apply_bpe(en_toks)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"en_bin = en2de.binarize(en_bpe)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"de_bin = en2de.generate(en_bin, beam=5, sampling=True, sampling_topk=20)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"de_sample = de_bin[0]['tokens']"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"de_bpe = en2de.string(de_sample)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"de_toks = en2de.remove_bpe(de_bpe)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# trenowanie https://github.com/pytorch/fairseq/blob/master/examples/translation/README.md"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"del en2de"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user