diff --git a/fairseq_introduction.ipynb b/fairseq_introduction.ipynb new file mode 100644 index 0000000..6e33260 --- /dev/null +++ b/fairseq_introduction.ipynb @@ -0,0 +1,722 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# https://github.com/pytorch/fairseq" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# https://fairseq.readthedocs.io/en/latest/" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: fairseq in /home/kuba/Syncthing/przedmioty/tau/fairseq (1.0.0a0+1164a7f)\n", + "Requirement already satisfied: cffi in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.14.0)\n", + "Requirement already satisfied: cython in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (0.29.21)\n", + "Requirement already satisfied: hydra-core<1.1 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.0.5)\n", + "Requirement already satisfied: omegaconf<2.1 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (2.0.5)\n", + "Requirement already satisfied: regex in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (2020.11.13)\n", + "Requirement already satisfied: sacrebleu>=1.4.12 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.4.14)\n", + "Requirement already satisfied: torch in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.7.0)\n", + "Requirement already satisfied: tqdm in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (4.54.1)\n", + "Requirement already satisfied: numpy in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from fairseq) (1.19.2)\n", + "Requirement already satisfied: pycparser in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from cffi->fairseq) (2.20)\n", + "Requirement already satisfied: importlib-resources; python_version < \"3.9\" in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from hydra-core<1.1->fairseq) (5.0.0)\n", + "Requirement already satisfied: antlr4-python3-runtime==4.8 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from hydra-core<1.1->fairseq) (4.8)\n", + "Requirement already satisfied: PyYAML>=5.1.* in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from omegaconf<2.1->fairseq) (5.4b2)\n", + "Requirement already satisfied: typing-extensions in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from omegaconf<2.1->fairseq) (3.7.4.3)\n", + "Requirement already satisfied: portalocker in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacrebleu>=1.4.12->fairseq) (2.0.0)\n", + "Requirement already satisfied: future in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from torch->fairseq) (0.18.2)\n", + "Requirement already satisfied: dataclasses in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from torch->fairseq) (0.6)\n" + ] + } + ], + "source": [ + "!pip install fairseq" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: fastBPE in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (0.1.0)\n", + "Requirement already satisfied: regex in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (2020.11.13)\n", + "Requirement already satisfied: requests in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (2.25.0)\n", + "Requirement already satisfied: sacremoses in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (0.0.43)\n", + "Requirement already satisfied: subword_nmt in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (0.3.7)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests) (3.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests) (2020.12.5)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests) (1.25.11)\n", + "Requirement already satisfied: idna<3,>=2.5 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests) (2.10)\n", + "Requirement already satisfied: tqdm in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacremoses) (4.54.1)\n", + "Requirement already satisfied: click in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacremoses) (7.1.2)\n", + "Requirement already satisfied: six in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacremoses) (1.15.0)\n", + "Requirement already satisfied: joblib in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from sacremoses) (0.17.0)\n" + ] + } + ], + "source": [ + "!pip install fastBPE regex requests sacremoses subword_nmt" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import torch, fairseq" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# https://github.com/pytorch/fairseq/blob/master/examples/language_model/README.md" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using cache found in /home/kuba/.cache/torch/hub/pytorch_fairseq_master\n" + ] + } + ], + "source": [ + "en_lm = torch.hub.load('pytorch/fairseq', 'transformer.wmt19.en-de.single_model')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "#en_lm.cuda()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Barack Obama'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "en_lm.sample('Barack Obama', beam=1, sampling=True, sampling_topk=10, temperature=0.8)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "del en_lm" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "# https://github.com/pytorch/fairseq/tree/master/examples/roberta" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using cache found in /home/kuba/.cache/torch/hub/pytorch_fairseq_master\n" + ] + } + ], + "source": [ + "roberta = torch.hub.load('pytorch/fairseq', 'roberta.base')" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# roberta.cuda()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RobertaHubInterface(\n", + " (model): RobertaModel(\n", + " (encoder): RobertaEncoder(\n", + " (sentence_encoder): TransformerSentenceEncoder(\n", + " (dropout_module): FairseqDropout()\n", + " (embed_tokens): Embedding(50265, 768, padding_idx=1)\n", + " (embed_positions): LearnedPositionalEmbedding(514, 768, padding_idx=1)\n", + " (emb_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (layers): ModuleList(\n", + " (0): TransformerSentenceEncoderLayer(\n", + " (dropout_module): FairseqDropout()\n", + " (activation_dropout_module): FairseqDropout()\n", + " (self_attn): MultiheadAttention(\n", + " (dropout_module): FairseqDropout()\n", + " (k_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (v_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (q_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (out_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " )\n", + " (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", + " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", + " (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " )\n", + " (1): TransformerSentenceEncoderLayer(\n", + " (dropout_module): FairseqDropout()\n", + " (activation_dropout_module): FairseqDropout()\n", + " (self_attn): MultiheadAttention(\n", + " (dropout_module): FairseqDropout()\n", + " (k_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (v_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (q_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (out_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " )\n", + " (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", + " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", + " (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " )\n", + " (2): TransformerSentenceEncoderLayer(\n", + " (dropout_module): FairseqDropout()\n", + " (activation_dropout_module): FairseqDropout()\n", + " (self_attn): MultiheadAttention(\n", + " (dropout_module): FairseqDropout()\n", + " (k_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (v_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (q_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (out_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " )\n", + " (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", + " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", + " (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " )\n", + " (3): TransformerSentenceEncoderLayer(\n", + " (dropout_module): FairseqDropout()\n", + " (activation_dropout_module): FairseqDropout()\n", + " (self_attn): MultiheadAttention(\n", + " (dropout_module): FairseqDropout()\n", + " (k_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (v_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (q_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (out_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " )\n", + " (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", + " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", + " (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " )\n", + " (4): TransformerSentenceEncoderLayer(\n", + " (dropout_module): FairseqDropout()\n", + " (activation_dropout_module): FairseqDropout()\n", + " (self_attn): MultiheadAttention(\n", + " (dropout_module): FairseqDropout()\n", + " (k_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (v_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (q_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (out_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " )\n", + " (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", + " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", + " (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " )\n", + " (5): TransformerSentenceEncoderLayer(\n", + " (dropout_module): FairseqDropout()\n", + " (activation_dropout_module): FairseqDropout()\n", + " (self_attn): MultiheadAttention(\n", + " (dropout_module): FairseqDropout()\n", + " (k_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (v_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (q_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (out_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " )\n", + " (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", + " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", + " (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " )\n", + " (6): TransformerSentenceEncoderLayer(\n", + " (dropout_module): FairseqDropout()\n", + " (activation_dropout_module): FairseqDropout()\n", + " (self_attn): MultiheadAttention(\n", + " (dropout_module): FairseqDropout()\n", + " (k_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (v_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (q_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (out_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " )\n", + " (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", + " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", + " (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " )\n", + " (7): TransformerSentenceEncoderLayer(\n", + " (dropout_module): FairseqDropout()\n", + " (activation_dropout_module): FairseqDropout()\n", + " (self_attn): MultiheadAttention(\n", + " (dropout_module): FairseqDropout()\n", + " (k_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (v_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (q_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (out_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " )\n", + " (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", + " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", + " (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " )\n", + " (8): TransformerSentenceEncoderLayer(\n", + " (dropout_module): FairseqDropout()\n", + " (activation_dropout_module): FairseqDropout()\n", + " (self_attn): MultiheadAttention(\n", + " (dropout_module): FairseqDropout()\n", + " (k_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (v_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (q_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (out_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " )\n", + " (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", + " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", + " (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " )\n", + " (9): TransformerSentenceEncoderLayer(\n", + " (dropout_module): FairseqDropout()\n", + " (activation_dropout_module): FairseqDropout()\n", + " (self_attn): MultiheadAttention(\n", + " (dropout_module): FairseqDropout()\n", + " (k_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (v_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (q_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (out_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " )\n", + " (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", + " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", + " (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " )\n", + " (10): TransformerSentenceEncoderLayer(\n", + " (dropout_module): FairseqDropout()\n", + " (activation_dropout_module): FairseqDropout()\n", + " (self_attn): MultiheadAttention(\n", + " (dropout_module): FairseqDropout()\n", + " (k_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (v_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (q_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (out_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " )\n", + " (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", + " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", + " (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " )\n", + " (11): TransformerSentenceEncoderLayer(\n", + " (dropout_module): FairseqDropout()\n", + " (activation_dropout_module): FairseqDropout()\n", + " (self_attn): MultiheadAttention(\n", + " (dropout_module): FairseqDropout()\n", + " (k_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (v_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (q_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " (out_proj): Linear(in_features=768, out_features=768, bias=True)\n", + " )\n", + " (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " (fc1): Linear(in_features=768, out_features=3072, bias=True)\n", + " (fc2): Linear(in_features=3072, out_features=768, bias=True)\n", + " (final_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " )\n", + " )\n", + " )\n", + " (lm_head): RobertaLMHead(\n", + " (dense): Linear(in_features=768, out_features=768, bias=True)\n", + " (layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)\n", + " )\n", + " )\n", + " (classification_heads): ModuleDict()\n", + " )\n", + ")" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "roberta.eval()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "tokens = roberta.encode('Hello world!')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "last_layer_features = roberta.extract_features(tokens)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "all_layers = roberta.extract_features(tokens, return_all_hiddens=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "assert torch.all(all_layers[-1] == last_layer_features)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using cache found in /home/kuba/.cache/torch/hub/pytorch_fairseq_master\n" + ] + } + ], + "source": [ + "roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Encode a pair of sentences and make a prediction\n", + "tokens = roberta.encode('Roberta is a heavily optimized version of BERT.', 'Roberta is not very optimized.')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "roberta.predict('mnli', tokens).argmax() # 0: contradiction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Encode another pair of sentences\n", + "tokens = roberta.encode('Roberta is a heavily optimized version of BERT.', 'Roberta is based on BERT.')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "roberta.predict('mnli', tokens).argmax() # 2: entailment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# contradiction - sprzeczność\n", + "# neutral\n", + "# entailment - wynikanie\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "roberta.fill_mask('The first Star wars movie came out in ', topk=3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "roberta.fill_mask('Vikram samvat calender is official in ', topk=3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "roberta.fill_mask(' is the common currency of the European Union', topk=3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "del roberta" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "###### FAIRSEQ TRANSLATION https://pytorch.org/hub/pytorch_fairseq_translation/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "en2de = torch.hub.load('pytorch/fairseq', 'transformer.wmt16.en-de',\n", + " tokenizer='moses', bpe='subword_nmt')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "en2de.eval()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# en2de.cuda()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "en2de.translate('Hello world!')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "en2de.translate(['Hello world!', 'The cat sat on the mat.'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "en2de.translate(['Hello world!', 'The cat sat on the mat.'], beam=5)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "en_toks = en2de.tokenize('Hello world!')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "en_bpe = en2de.apply_bpe(en_toks)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "en_bin = en2de.binarize(en_bpe)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "de_bin = en2de.generate(en_bin, beam=5, sampling=True, sampling_topk=20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "de_sample = de_bin[0]['tokens']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "de_bpe = en2de.string(de_sample)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "de_toks = en2de.remove_bpe(de_bpe)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# trenowanie https://github.com/pytorch/fairseq/blob/master/examples/translation/README.md" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "del en2de" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}