DL_SEQ2SEQ/seq2seq-torch-successful.ipynb
2024-06-03 06:53:52 +02:00

1156 lines
121 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"source": [
"### Importy"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {
"collapsed": true,
"ExecuteTime": {
"start_time": "2024-06-02T19:58:41.249607Z",
"end_time": "2024-06-02T19:58:41.261609Z"
}
},
"outputs": [],
"source": [
"from __future__ import unicode_literals, print_function, division\n",
"from io import open\n",
"import unicodedata\n",
"import re\n",
"import os\n",
"import random\n",
"import torch\n",
"import pandas as pd\n",
"import torch.nn as nn\n",
"from torch import optim\n",
"import torch.nn.functional as F\n",
"from torchtext.data.metrics import bleu_score\n",
"\n",
"from torch.utils.data import TensorDataset, DataLoader, RandomSampler\n",
"os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
]
},
{
"cell_type": "code",
"execution_count": 9,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Is CUDA supported by this system? True\n",
"CUDA version: 12.1\n",
"ID of current CUDA device: 0\n",
"Name of current CUDA device: NVIDIA GeForce GTX 1660 Ti\n"
]
}
],
"source": [
"print(f'Is CUDA supported by this system? {torch.cuda.is_available()}')\n",
"print(f\"CUDA version: {torch.version.cuda}\")\n",
"\n",
"cuda_id = torch.cuda.current_device()\n",
"print(f'ID of current CUDA device: {torch.cuda.current_device()}')\n",
"\n",
"print(f'Name of current CUDA device: {torch.cuda.get_device_name(cuda_id)}')"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T19:20:55.709021Z",
"end_time": "2024-06-02T19:20:55.725023Z"
}
}
},
{
"cell_type": "code",
"execution_count": 10,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"cuda\n"
]
}
],
"source": [
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
"print(device)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T19:20:55.996605Z",
"end_time": "2024-06-02T19:20:56.041138Z"
}
}
},
{
"cell_type": "markdown",
"source": [
"### Konwersja słów na tensory"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 11,
"outputs": [],
"source": [
"SOS_token = 0\n",
"EOS_token = 1\n",
"\n",
"class Lang:\n",
" def __init__(self, name):\n",
" self.name = name\n",
" self.word2index = {}\n",
" self.word2count = {}\n",
" self.index2word = {0: \"SOS\", 1: \"EOS\"}\n",
" self.n_words = 2 # Count SOS and EOS\n",
"\n",
" def addSentence(self, sentence):\n",
" for word in sentence.split(' '):\n",
" self.addWord(word)\n",
"\n",
" def addWord(self, word):\n",
" if word not in self.word2index:\n",
" self.word2index[word] = self.n_words\n",
" self.word2count[word] = 1\n",
" self.index2word[self.n_words] = word\n",
" self.n_words += 1\n",
" else:\n",
" self.word2count[word] += 1"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T19:20:59.879666Z",
"end_time": "2024-06-02T19:20:59.893667Z"
}
}
},
{
"cell_type": "markdown",
"source": [
"### Przygotowanie danych"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 12,
"outputs": [],
"source": [
"# Turn a Unicode string to plain ASCII, thanks to\n",
"# https://stackoverflow.com/a/518232/2809427\n",
"def unicodeToAscii(s):\n",
" return ''.join(\n",
" c for c in unicodedata.normalize('NFD', s)\n",
" if unicodedata.category(c) != 'Mn'\n",
" )\n",
"\n",
"# Lowercase, trim, and remove non-letter characters\n",
"def normalizeString(s):\n",
" s = unicodeToAscii(s.lower().strip())\n",
" s = re.sub(r\"([.!?])\", r\" \\1\", s)\n",
" s = re.sub(r\"[^a-zA-Z!?]+\", r\" \", s)\n",
" return s.strip()"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T19:21:00.877093Z",
"end_time": "2024-06-02T19:21:00.892090Z"
}
}
},
{
"cell_type": "markdown",
"source": [
"### Wczytanie danych"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 13,
"outputs": [],
"source": [
"def readLangs(lang1, lang2, reverse=False):\n",
" print(\"Reading lines...\")\n",
" # Read the file and split into lines\n",
" lines = open('data/%s-%s.txt' % (lang1, lang2), encoding='utf-8').\\\n",
" read().strip().split('\\n')\n",
"\n",
" # Split every line into pairs and normalize\n",
" pairs = [[normalizeString(s) for s in l.split('\\t')[:-1]] for l in lines]\n",
"\n",
" # Reverse pairs, make Lang instances\n",
" if reverse:\n",
" pairs = [df_filtered(reversed(p)) for p in pairs]\n",
" input_lang = Lang(lang2)\n",
" output_lang = Lang(lang1)\n",
" else:\n",
" input_lang = Lang(lang1)\n",
" output_lang = Lang(lang2)\n",
"\n",
" return input_lang, output_lang, pairs"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T19:21:02.075474Z",
"end_time": "2024-06-02T19:21:02.087474Z"
}
}
},
{
"cell_type": "markdown",
"source": [
"### Filtracja danych"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "markdown",
"source": [
"Ograniczenie zdań do 10 słów oraz zdań zaczynających się od prefiksów"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 14,
"outputs": [],
"source": [
"MAX_LENGTH = 10\n",
"\n",
"eng_prefixes = (\n",
" \"i am \", \"i m \",\n",
" \"he is\", \"he s \",\n",
" \"she is\", \"she s \",\n",
" \"you are\", \"you re \",\n",
" \"we are\", \"we re \",\n",
" \"they are\", \"they re \"\n",
")\n",
"\n",
"def filterPair(p):\n",
" return len(p[0].split(' ')) < MAX_LENGTH and \\\n",
" len(p[1].split(' ')) < MAX_LENGTH and \\\n",
" p[1].startswith(eng_prefixes)\n",
"\n",
"\n",
"def filterPairs(pairs):\n",
" return [pair for pair in pairs if filterPair(pair)]"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T19:21:03.811303Z",
"end_time": "2024-06-02T19:21:03.829054Z"
}
}
},
{
"cell_type": "code",
"execution_count": 15,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Reading lines...\n",
"Read 49943 sentence pairs\n",
"Trimmed to 3613 sentence pairs\n",
"Counting words...\n",
"Counted words:\n",
"pol 3070\n",
"eng 1969\n",
"['nie umieram', 'i m not dying']\n"
]
}
],
"source": [
"def prepareData(lang1, lang2, reverse=False):\n",
" input_lang, output_lang, pairs = readLangs(lang1, lang2, reverse)\n",
" print(\"Read %s sentence pairs\" % len(pairs))\n",
" pairs = filterPairs(pairs)\n",
" print(\"Trimmed to %s sentence pairs\" % len(pairs))\n",
" print(\"Counting words...\")\n",
" for pair in pairs:\n",
" input_lang.addSentence(pair[0])\n",
" output_lang.addSentence(pair[1])\n",
" print(\"Counted words:\")\n",
" print(input_lang.name, input_lang.n_words)\n",
" print(output_lang.name, output_lang.n_words)\n",
" return input_lang, output_lang, pairs\n",
"\n",
"input_lang, output_lang, pairs = prepareData('eng', 'pol' , True)\n",
"print(random.choice(pairs))"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T19:21:04.527025Z",
"end_time": "2024-06-02T19:21:06.394023Z"
}
}
},
{
"cell_type": "markdown",
"source": [
"### Model"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 16,
"outputs": [],
"source": [
"class EncoderRNN(nn.Module):\n",
" def __init__(self, input_size, hidden_size, dropout_p=0.1):\n",
" super(EncoderRNN, self).__init__()\n",
" self.hidden_size = hidden_size\n",
"\n",
" self.embedding = nn.Embedding(input_size, hidden_size)\n",
" self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)\n",
" self.dropout = nn.Dropout(dropout_p)\n",
"\n",
" def forward(self, input):\n",
" embedded = self.dropout(self.embedding(input))\n",
" output, hidden = self.gru(embedded)\n",
" return output, hidden"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T19:21:11.058623Z",
"end_time": "2024-06-02T19:21:11.074974Z"
}
}
},
{
"cell_type": "code",
"execution_count": 17,
"outputs": [],
"source": [
"class DecoderRNN(nn.Module):\n",
" def __init__(self, hidden_size, output_size):\n",
" super(DecoderRNN, self).__init__()\n",
" self.embedding = nn.Embedding(output_size, hidden_size)\n",
" self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)\n",
" self.out = nn.Linear(hidden_size, output_size)\n",
"\n",
" def forward(self, encoder_outputs, encoder_hidden, target_tensor=None):\n",
" batch_size = encoder_outputs.size(0)\n",
" decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOS_token)\n",
" decoder_hidden = encoder_hidden\n",
" decoder_outputs = []\n",
"\n",
" for i in range(MAX_LENGTH):\n",
" decoder_output, decoder_hidden = self.forward_step(decoder_input, decoder_hidden)\n",
" decoder_outputs.append(decoder_output)\n",
"\n",
" if target_tensor is not None:\n",
" # Teacher forcing: Feed the target as the next input\n",
" decoder_input = target_tensor[:, i].unsqueeze(1) # Teacher forcing\n",
" else:\n",
" # Without teacher forcing: use its own predictions as the next input\n",
" _, topi = decoder_output.topk(1)\n",
" decoder_input = topi.squeeze(-1).detach() # detach from history as input\n",
"\n",
" decoder_outputs = torch.cat(decoder_outputs, dim=1)\n",
" decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)\n",
" return decoder_outputs, decoder_hidden, None # We return `None` for consistency in the training loop\n",
"\n",
" def forward_step(self, input, hidden):\n",
" output = self.embedding(input)\n",
" output = F.relu(output)\n",
" output, hidden = self.gru(output, hidden)\n",
" output = self.out(output)\n",
" return output, hidden"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T19:21:11.447213Z",
"end_time": "2024-06-02T19:21:11.462232Z"
}
}
},
{
"cell_type": "code",
"execution_count": 18,
"outputs": [],
"source": [
"class BahdanauAttention(nn.Module):\n",
" def __init__(self, hidden_size):\n",
" super(BahdanauAttention, self).__init__()\n",
" self.Wa = nn.Linear(hidden_size, hidden_size)\n",
" self.Ua = nn.Linear(hidden_size, hidden_size)\n",
" self.Va = nn.Linear(hidden_size, 1)\n",
"\n",
" def forward(self, query, keys):\n",
" scores = self.Va(torch.tanh(self.Wa(query) + self.Ua(keys)))\n",
" scores = scores.squeeze(2).unsqueeze(1)\n",
"\n",
" weights = F.softmax(scores, dim=-1)\n",
" context = torch.bmm(weights, keys)\n",
"\n",
" return context, weights\n",
"\n",
"class AttnDecoderRNN(nn.Module):\n",
" def __init__(self, hidden_size, output_size, dropout_p=0.1):\n",
" super(AttnDecoderRNN, self).__init__()\n",
" self.embedding = nn.Embedding(output_size, hidden_size)\n",
" self.attention = BahdanauAttention(hidden_size)\n",
" self.gru = nn.GRU(2 * hidden_size, hidden_size, batch_first=True)\n",
" self.out = nn.Linear(hidden_size, output_size)\n",
" self.dropout = nn.Dropout(dropout_p)\n",
"\n",
" def forward(self, encoder_outputs, encoder_hidden, target_tensor=None):\n",
" batch_size = encoder_outputs.size(0)\n",
" decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOS_token)\n",
" decoder_hidden = encoder_hidden\n",
" decoder_outputs = []\n",
" attentions = []\n",
"\n",
" for i in range(MAX_LENGTH):\n",
" decoder_output, decoder_hidden, attn_weights = self.forward_step(\n",
" decoder_input, decoder_hidden, encoder_outputs\n",
" )\n",
" decoder_outputs.append(decoder_output)\n",
" attentions.append(attn_weights)\n",
"\n",
" if target_tensor is not None:\n",
" # Teacher forcing: Feed the target as the next input\n",
" decoder_input = target_tensor[:, i].unsqueeze(1) # Teacher forcing\n",
" else:\n",
" # Without teacher forcing: use its own predictions as the next input\n",
" _, topi = decoder_output.topk(1)\n",
" decoder_input = topi.squeeze(-1).detach() # detach from history as input\n",
"\n",
" decoder_outputs = torch.cat(decoder_outputs, dim=1)\n",
" decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)\n",
" attentions = torch.cat(attentions, dim=1)\n",
"\n",
" return decoder_outputs, decoder_hidden, attentions\n",
"\n",
"\n",
" def forward_step(self, input, hidden, encoder_outputs):\n",
" embedded = self.dropout(self.embedding(input))\n",
"\n",
" query = hidden.permute(1, 0, 2)\n",
" context, attn_weights = self.attention(query, encoder_outputs)\n",
" input_gru = torch.cat((embedded, context), dim=2)\n",
"\n",
" output, hidden = self.gru(input_gru, hidden)\n",
" output = self.out(output)\n",
"\n",
" return output, hidden, attn_weights"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T19:21:12.049305Z",
"end_time": "2024-06-02T19:21:12.073302Z"
}
}
},
{
"cell_type": "code",
"execution_count": 21,
"outputs": [],
"source": [
"def indexesFromSentence(lang, sentence):\n",
" return [lang.word2index[word] for word in sentence.split(' ')]\n",
"\n",
"def tensorFromSentence(lang, sentence):\n",
" indexes = indexesFromSentence(lang, sentence)\n",
" indexes.append(EOS_token)\n",
" return torch.tensor(indexes, dtype=torch.long, device=device).view(1, -1)\n",
"\n",
"def tensorsFromPair(pair):\n",
" input_tensor = tensorFromSentence(input_lang, pair[0])\n",
" target_tensor = tensorFromSentence(output_lang, pair[1])\n",
" return (input_tensor, target_tensor)\n",
"\n",
"def get_dataloader(batch_size):\n",
" input_lang, output_lang, pairs = prepareData( 'eng', 'pol', True)\n",
"\n",
" n = len(pairs)\n",
" input_ids = np.zeros((n, MAX_LENGTH), dtype=np.int32)\n",
" target_ids = np.zeros((n, MAX_LENGTH), dtype=np.int32)\n",
"\n",
" for idx, (inp, tgt) in enumerate(pairs):\n",
" inp_ids = indexesFromSentence(input_lang, inp)\n",
" tgt_ids = indexesFromSentence(output_lang, tgt)\n",
" inp_ids.append(EOS_token)\n",
" tgt_ids.append(EOS_token)\n",
" input_ids[idx, :len(inp_ids)] = inp_ids\n",
" target_ids[idx, :len(tgt_ids)] = tgt_ids\n",
"\n",
" train_data = TensorDataset(torch.LongTensor(input_ids).to(device),\n",
" torch.LongTensor(target_ids).to(device))\n",
"\n",
" train_sampler = RandomSampler(train_data)\n",
" train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)\n",
" return input_lang, output_lang, train_dataloader"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T19:23:18.301396Z",
"end_time": "2024-06-02T19:23:18.321420Z"
}
}
},
{
"cell_type": "markdown",
"source": [
"### Trening"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 22,
"outputs": [],
"source": [
"def train_epoch(dataloader, encoder, decoder, encoder_optimizer,\n",
" decoder_optimizer, criterion):\n",
"\n",
" total_loss = 0\n",
" for data in dataloader:\n",
" input_tensor, target_tensor = data\n",
"\n",
" encoder_optimizer.zero_grad()\n",
" decoder_optimizer.zero_grad()\n",
"\n",
" encoder_outputs, encoder_hidden = encoder(input_tensor)\n",
" decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden, target_tensor)\n",
"\n",
" loss = criterion(\n",
" decoder_outputs.view(-1, decoder_outputs.size(-1)),\n",
" target_tensor.view(-1)\n",
" )\n",
" loss.backward()\n",
"\n",
" encoder_optimizer.step()\n",
" decoder_optimizer.step()\n",
"\n",
" total_loss += loss.item()\n",
"\n",
" return total_loss / len(dataloader)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T19:23:19.166843Z",
"end_time": "2024-06-02T19:23:19.182827Z"
}
}
},
{
"cell_type": "code",
"execution_count": 23,
"outputs": [],
"source": [
"import time\n",
"import math\n",
"\n",
"def asMinutes(s):\n",
" m = math.floor(s / 60)\n",
" s -= m * 60\n",
" return '%dm %ds' % (m, s)\n",
"\n",
"def timeSince(since, percent):\n",
" now = time.time()\n",
" s = now - since\n",
" es = s / (percent)\n",
" rs = es - s\n",
" return '%s (- %s)' % (asMinutes(s), asMinutes(rs))"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T19:23:19.675207Z",
"end_time": "2024-06-02T19:23:19.699207Z"
}
}
},
{
"cell_type": "code",
"execution_count": 24,
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"plt.switch_backend('agg')\n",
"import matplotlib.ticker as ticker\n",
"import numpy as np\n",
"\n",
"def showPlot(points):\n",
" plt.figure()\n",
" fig, ax = plt.subplots()\n",
" # this locator puts ticks at regular intervals\n",
" loc = ticker.MultipleLocator(base=0.2)\n",
" ax.yaxis.set_major_locator(loc)\n",
" plt.plot(points)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T19:23:20.120325Z",
"end_time": "2024-06-02T19:23:20.833674Z"
}
}
},
{
"cell_type": "code",
"execution_count": 25,
"outputs": [],
"source": [
"def train(train_dataloader, encoder, decoder, n_epochs, learning_rate=0.001,\n",
" print_every=100, plot_every=100):\n",
" start = time.time()\n",
" plot_losses = []\n",
" print_loss_total = 0 # Reset every print_every\n",
" plot_loss_total = 0 # Reset every plot_every\n",
"\n",
" encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)\n",
" decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)\n",
" criterion = nn.NLLLoss()\n",
"\n",
" for epoch in range(1, n_epochs + 1):\n",
" loss = train_epoch(train_dataloader, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)\n",
" print_loss_total += loss\n",
" plot_loss_total += loss\n",
"\n",
" if epoch % print_every == 0:\n",
" print_loss_avg = print_loss_total / print_every\n",
" print_loss_total = 0\n",
" print('%s (%d %d%%) %.4f' % (timeSince(start, epoch / n_epochs),\n",
" epoch, epoch / n_epochs * 100, print_loss_avg))\n",
"\n",
" if epoch % plot_every == 0:\n",
" plot_loss_avg = plot_loss_total / plot_every\n",
" plot_losses.append(plot_loss_avg)\n",
" plot_loss_total = 0\n",
"\n",
" showPlot(plot_losses)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T19:23:21.920756Z",
"end_time": "2024-06-02T19:23:21.949755Z"
}
}
},
{
"cell_type": "code",
"execution_count": 98,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Reading lines...\n",
"Read 49943 sentence pairs\n",
"Trimmed to 3613 sentence pairs\n",
"Counting words...\n",
"Counted words:\n",
"pol 3070\n",
"eng 1969\n",
"0m 7s (- 2m 18s) (5 5%) 1.9851\n",
"0m 14s (- 2m 8s) (10 10%) 1.0089\n",
"0m 21s (- 1m 59s) (15 15%) 0.5189\n",
"0m 28s (- 1m 52s) (20 20%) 0.2294\n",
"0m 35s (- 1m 45s) (25 25%) 0.0961\n",
"0m 42s (- 1m 38s) (30 30%) 0.0509\n",
"0m 50s (- 1m 33s) (35 35%) 0.0355\n",
"0m 57s (- 1m 25s) (40 40%) 0.0289\n",
"1m 4s (- 1m 18s) (45 45%) 0.0249\n",
"1m 11s (- 1m 11s) (50 50%) 0.0228\n",
"1m 18s (- 1m 4s) (55 55%) 0.0207\n",
"1m 25s (- 0m 57s) (60 60%) 0.0215\n",
"1m 32s (- 0m 49s) (65 65%) 0.0249\n",
"1m 39s (- 0m 42s) (70 70%) 0.0184\n",
"1m 47s (- 0m 35s) (75 75%) 0.0172\n",
"1m 55s (- 0m 28s) (80 80%) 0.0166\n",
"2m 3s (- 0m 21s) (85 85%) 0.0163\n",
"2m 11s (- 0m 14s) (90 90%) 0.0163\n",
"2m 18s (- 0m 7s) (95 95%) 0.0176\n",
"2m 27s (- 0m 0s) (100 100%) 0.0256\n"
]
},
{
"data": {
"text/plain": "<Figure size 640x480 with 0 Axes>"
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": "<Figure size 640x480 with 1 Axes>",
"image/png": ""
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"hidden_size = 256\n",
"batch_size = 64\n",
"\n",
"input_lang, output_lang, train_dataloader = get_dataloader(batch_size)\n",
"\n",
"encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)\n",
"decoder = AttnDecoderRNN(hidden_size, output_lang.n_words).to(device)\n",
"\n",
"train(train_dataloader, encoder, decoder, 100, print_every=5, plot_every=5)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T20:00:44.619526Z",
"end_time": "2024-06-02T20:03:13.180305Z"
}
}
},
{
"cell_type": "markdown",
"source": [
"### Ewaluacja"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 85,
"outputs": [],
"source": [
"def evaluate(encoder, decoder, sentence, input_lang, output_lang):\n",
" with torch.no_grad():\n",
" input_tensor = tensorFromSentence(input_lang, sentence)\n",
"\n",
" encoder_outputs, encoder_hidden = encoder(input_tensor)\n",
" decoder_outputs, decoder_hidden, decoder_attn = decoder(encoder_outputs, encoder_hidden)\n",
"\n",
" _, topi = decoder_outputs.topk(1)\n",
" decoded_ids = topi.squeeze()\n",
"\n",
" decoded_words = []\n",
" for idx in decoded_ids:\n",
" if idx.item() == EOS_token:\n",
" decoded_words.append('<EOS>')\n",
" break\n",
" decoded_words.append(output_lang.index2word[idx.item()])\n",
" return decoded_words, decoder_attn"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T19:59:01.782695Z",
"end_time": "2024-06-02T19:59:01.811933Z"
}
}
},
{
"cell_type": "code",
"execution_count": 86,
"outputs": [],
"source": [
"def evaluateRandomly(encoder, decoder, n=10):\n",
" for i in range(n):\n",
" pair = random.choice(pairs)\n",
" print('>', pair[0])\n",
" print('=', pair[1])\n",
" output_words, _ = evaluate(encoder, decoder, pair[0], input_lang, output_lang)\n",
" output_sentence = ' '.join(output_words)\n",
" print('<', output_sentence)\n",
" print('')"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T19:59:02.352827Z",
"end_time": "2024-06-02T19:59:02.374825Z"
}
}
},
{
"cell_type": "code",
"execution_count": 99,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"> utne sobie drzemke\n",
"= i m going to go take a nap\n",
"< i m going to go take a nap wallet <EOS>\n",
"\n",
"> nie jestem co do tego pewny to zalezy\n",
"= i m not sure about that it depends\n",
"< i m not sure about that it depends <EOS>\n",
"\n",
"> nie kupujemy\n",
"= we re not buying\n",
"< we re not buying <EOS>\n",
"\n",
"> nie jestem g upi\n",
"= i m not stupid\n",
"< i m not stupid <EOS>\n",
"\n",
"> jestes wymagajacy\n",
"= you re demanding\n",
"< you re demanding <EOS>\n",
"\n",
"> jestem m ody ale nie az tak\n",
"= i m young but i m not that young\n",
"< i m young but i m not that young <EOS>\n",
"\n",
"> nie jestem ubrana\n",
"= i m not dressed\n",
"< i m not dressed <EOS>\n",
"\n",
"> jestem gotowy sie z tym pogodzic\n",
"= i m ready to accept it\n",
"< i m ready to accept it <EOS>\n",
"\n",
"> jestem pewny ze ona nied ugo wroci\n",
"= i m sure that she will come back soon\n",
"< i m sure that she will come back soon <EOS>\n",
"\n",
"> w niedziele mam wolne\n",
"= i m free on sunday\n",
"< i m free on sunday <EOS>\n",
"\n"
]
}
],
"source": [
"encoder.eval()\n",
"decoder.eval()\n",
"evaluateRandomly(encoder, decoder)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T20:03:19.348154Z",
"end_time": "2024-06-02T20:03:19.572157Z"
}
}
},
{
"cell_type": "code",
"execution_count": 88,
"outputs": [],
"source": [
"def showAttention(input_sentence, output_words, attentions):\n",
" fig = plt.figure()\n",
" ax = fig.add_subplot(111)\n",
" cax = ax.matshow(attentions.cpu().numpy(), cmap='bone')\n",
" fig.colorbar(cax)\n",
"\n",
" # Set up axes\n",
" ax.set_xticklabels([''] + input_sentence.split(' ') +\n",
" ['<EOS>'], rotation=90)\n",
" ax.set_yticklabels([''] + output_words)\n",
"\n",
" # Show label at every tick\n",
" ax.xaxis.set_major_locator(ticker.MultipleLocator(1))\n",
" ax.yaxis.set_major_locator(ticker.MultipleLocator(1))\n",
"\n",
" plt.show()\n",
"\n",
"\n",
"def evaluateAndShowAttention(input_sentence):\n",
" input_sentence = normalizeString(input_sentence)\n",
" output_words, attentions = evaluate(encoder, decoder, input_sentence, input_lang, output_lang)\n",
" print('input =', input_sentence)\n",
" print('output =', ' '.join(output_words))\n",
" showAttention(input_sentence, output_words, attentions[0, :len(output_words), :])"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T19:59:04.218821Z",
"end_time": "2024-06-02T19:59:04.250855Z"
}
}
},
{
"cell_type": "code",
"execution_count": 100,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"input = nie jestem katoliczka\n",
"output = i m not catholic <EOS>\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\adamw\\AppData\\Local\\Temp\\ipykernel_17652\\691622281.py:8: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.\n",
" ax.set_xticklabels([''] + input_sentence.split(' ') +\n",
"C:\\Users\\adamw\\AppData\\Local\\Temp\\ipykernel_17652\\691622281.py:10: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.\n",
" ax.set_yticklabels([''] + output_words)\n"
]
},
{
"data": {
"text/plain": "<Figure size 640x480 with 2 Axes>",
"image/png": ""
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"evaluateAndShowAttention('Nie jestem katoliczką')"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T20:03:24.422192Z",
"end_time": "2024-06-02T20:03:24.634214Z"
}
}
},
{
"cell_type": "code",
"execution_count": 101,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"input = przykro nam ze to sie zdarzy o\n",
"output = we re sorry that it happened <EOS>\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\adamw\\AppData\\Local\\Temp\\ipykernel_17652\\691622281.py:8: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.\n",
" ax.set_xticklabels([''] + input_sentence.split(' ') +\n",
"C:\\Users\\adamw\\AppData\\Local\\Temp\\ipykernel_17652\\691622281.py:10: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.\n",
" ax.set_yticklabels([''] + output_words)\n"
]
},
{
"data": {
"text/plain": "<Figure size 640x480 with 2 Axes>",
"image/png": ""
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"evaluateAndShowAttention('Przykro nam ze to sie zdarzyło')\n"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T20:03:25.856941Z",
"end_time": "2024-06-02T20:03:26.205536Z"
}
}
},
{
"cell_type": "code",
"execution_count": 102,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"input = on mowi p ynnie po francusku\n",
"output = he is fluent in french <EOS>\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\adamw\\AppData\\Local\\Temp\\ipykernel_17652\\691622281.py:8: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.\n",
" ax.set_xticklabels([''] + input_sentence.split(' ') +\n",
"C:\\Users\\adamw\\AppData\\Local\\Temp\\ipykernel_17652\\691622281.py:10: UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.\n",
" ax.set_yticklabels([''] + output_words)\n"
]
},
{
"data": {
"text/plain": "<Figure size 640x480 with 2 Axes>",
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAikAAAHECAYAAAD8obrfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA7FElEQVR4nO3df3zNdf/H8efZ2Cb7ZcY2LEsIZX5NokRdfqVLXPohuWwUlSxGXSKFXGUuV37UNVEi+q3SVQoTYxFKFPqh/IqNbH5l83Nj53z/cO18O23TtrOzz/mc87i7fW45n/P5nM/rY7HXXq/3+/2x2Gw2mwAAANyMj9EBAAAAFIckBQAAuCWSFAAA4JZIUgAAgFsiSQEAAG6JJAUAALglkhQAAOCWSFIAAIBbIkkBAABuiSQFAAC4JZIUAADglkhSAACAW6pidAAAANe66qqrZLFYSnx/3759lRgNUHokKQDg4ZKSkhxeX7hwQd9++61SU1P1j3/8w5iggFKw2Gw2m9FBAAAq3+zZs7Vlyxa99tprRocCFIskBQC81L59+9SyZUvl5uYaHQpQLAbOAoCX+uCDDxQWFmZ0GECJGJMCAB6uVatWDgNnbTabsrKydPToUb300ksGRgZcHkkKAHi4Pn36OLz28fFRrVq11LlzZzVp0sSYoIBSYEwKAHi4gwcPql69esW+9+WXX+qGG26o5IiA0mFMCgB4uG7duunEiRNF9m/YsEE9evQwICKgdEhSAMDD3XDDDerWrZtOnTpl37du3Tr17NlTEydONDAy4PJo9wCAh7Narbrrrrt04sQJrVy5Uhs3btQdd9yhZ599ViNHjjQ6PKBEJCkA4AXy8/N1++236+zZs9qxY4eSk5OVmJhodFjAZZGkAIAH2rFjR5F9p06dUv/+/XX77bdr2LBh9v2xsbGVGRpQaiQpAOCBfHx8ZLFY9Pt/4n//uvD3FotFBQUFRoUJXBbrpACAB/rll1+MDgFwGpUUAADglpiCDAAebtGiRVq2bJn99ZgxYxQaGqoOHTrowIEDBkYGXB5JCgB4uClTpqhatWqSpE2bNiklJUXTpk1TeHi4Ro0aZXB0Fa+goEA7duzQxYsXjQ4FTiJJAQAPl5mZqYYNG0qSPvroI91111168MEHlZycrPXr1xscXcX75JNP1KpVKy1evNjoUOAkkhQA8HCBgYE6fvy4JOmzzz5T165dJUkBAQE6d+6ckaG5xKJFi1SrVi0tXLjQ6FDgJGb3AICH69q1q4YMGaJWrVpp165d6tmzpyTphx9+UExMjLHBVbBjx45pxYoV+uijj3THHXdc9uGKcH9UUgDAw82ePVvt27fX0aNHtWTJEtWsWVOStHXrVvXv39/g6CrWO++8o+uuu049evRQx44d9cYbbxgdEpzAFGQAgMdo06aNEhISNGLECL322muaNm2adu7caXRYKCeSFADwcOvWrbvs+zfffHMlReJa33//vdq0aaNDhw4pPDxcp0+fVkREhNasWaN27doZHR7KgSQFADycj0/Rzr7FYrH/3lOWxf/HP/6hn376SZ988ol934ABAxQcHKw5c+YYGBnKizEpAODhfvvtN4ftyJEjSk1NVdu2bfXZZ58ZHV6FKCgo0Jtvvqn4+HiH/X//+9+1ePFi5efnGxQZnMHsHgDwcCEhIUX2de3aVX5+fho9erS2bt1qQFQV68iRIxo2bJh69+7tsL979+4aPXq0srKydOWVVxoUHcqLdg8AeKmffvpJcXFxOn36tNGhAMWikgIAHm7Hjh0Or202mw4fPqypU6eqZcuWxgRVCQ4cOKAzZ86oSZMmxY7LgfujkgLgT128eFHp6enau3ev7rvvPgUFBenXX39VcHCwAgMDjQ4Pf8LHx0cWi0V//Of+hhtu0IIFC9SkSRODIqsYCxYs0MmTJzV69Gj7vgcffFDz58+XJF1zzTVauXKloqOjjQoR5USSAuCyDhw4oB49eigjI0N5eXnatWuXGjRooJEjRyovL09z5841OkT8iT8+6djHx0e1atVSQECAQRFVrBtuuEEPPfSQBg8eLElKTU1Vr169tHDhQjVt2lSJiYlq1qyZXn31VYMjRVnR7gFwWSNHjlRcXJy2b99uX6lUkv72t79p6NChBkaG0qpfv77RIbjU7t27FRcXZ3/98ccfq3fv3howYICkS0+BLkxgYC4kKXCJsLAw7dq1S+Hh4apRo4bDmgx/dOLEiUqMDGW1fv16bdy4UX5+fg77Y2JidOjQIYOiQlmMGDFCDRs21IgRIxz2p6SkaM+ePZo1a5YxgVWQc+fOKTg42P5648aNeuCBB+yvGzRooKysLCNCg5NIUuASM2fOVFBQkP33l0tS4N6sVmuxi30dPHjQ/jWGe1uyZImWLl1aZH+HDh00depU0ycp9evX19atW1W/fn0dO3ZMP/zwg2688Ub7+1lZWcVOw4b7I0mBSyQkJNh/P2jQIOMCgdO6deumWbNm6ZVXXpF0aaXS06dPa+LEifan6cK9HT9+vNhv0sHBwTp27JgBEVWshIQEDR8+XD/88IPWrFmjJk2aqE2bNvb3N27cqOuuu87ACFFezMmCy8XHx+u1117T3r17jQ4F5TB9+nRt2LBBzZo10/nz53XffffZWz3/+te/jA4PpdCwYUOlpqYW2b9ixQo1aNDAgIgq1pgxYzR06FB9+OGHCggI0Pvvv+/w/oYNGzzuac/egtk9cLkhQ4Zo3bp12rNnj+rWratOnTqpc+fO6tSpkxo1amR0eCiFixcv6t1339WOHTt0+vRptW7dWgMGDFC1atWMDg2lsGDBAiUmJuof//iHbr31VklSWlqapk+frlmzZjEAGm6LJAWV5tChQ1q3bp0+//xzff7559q1a5eioqJ08OBBo0MDPN6cOXP03HPP6ddff5V0aeDzpEmTijzrxszOnTunVatWadeuXZKkxo0bq2vXriTTJsaYFFSaGjVqqGbNmqpRo4ZCQ0NVpUoV1apVy+iwUIylS5fqtttuU9WqVYsdcPl7d9xxRyVFBWcMGzZMw4YN09GjR1WtWjWPW4Rv6dKlGjJkSJExNuHh4Zo/f7569eplUGRwBpUUuNyTTz6p9PR0ffvtt2ratKm93XPzzTerRo0aRoeHYvj4+CgrK0u1a9e+7HLiFoul2Jk/QGXauHGjOnfurDvuuEOPPfaYmjZtKkn68ccfNX36dH366af6/PPPdcMNNxgcKcqKJAUuV7i65ahRo9S3b181btzY6JAAr5Kdna3HH39caWlpOnLkSJHl8c2eaPbs2VPR0dF6+eWXi33/oYceUmZmppYvX17JkcFZJClwue3bt+vzzz9Xenq61q9fLz8/P3s1pXPnziQtgIvddtttysjIUGJioqKiooqsW9S7d2+DIqsYYWFh+vzzz9W8efNi39+xY4c6deqk3377rZIjg7NIUlDptm/frpkzZ+qtt94qcaEwuJe0tDT7T+FWq9XhvQULFhgUFUorKChI69ev99gnHlerVk0//fRTicv/HzhwQE2aNNG5c+cqOTI4i4GzcDmbzaZvv/1W6enpSk9P1xdffKHc3FzFxsaqU6dORoeHP/HMM89o8uTJiouLK/ancLi/6OjoIi0eT9KoUSOtWbOmxOfzpKWlsdyBSZGkwOXCwsJ0+vRptWjRQp06ddLQoUPVsWNHhYaGGh0aSmHu3LlauHChBg4caHQoKKdZs2Zp7NixevnllxUTE2N0OBVu8ODBevzxxxUREVFkFeRly5ZpzJgxevLJJw2KDs6g3QOXW7ZsmTp27OjwADCYR82aNbV582ZdffXVRoeCcqpRo4bOnj2rixcv6oorrlDVqlUd3jf7Qz6tVqv69eunJUuW6JprrlHTpk1ls9m0c+dO7d69W3369NH7779/2ZlqcE8kKahUhQu31atXz+BIUFpPPPGEAgMD9fTTTxsdCspp0aJFl33/98/aMrPFixfrnXfecVjM7d5779W9995rcGQoL5IUuJzVatWzzz6r6dOn6/Tp05IuDeR77LHHNH78eH66cXMjR47U66+/rtjYWMXGxhb5KXzGjBkGRQbA0zEmBS43fvx4zZ8/X1OnTrU/Pv2LL77QpEmTdP78eT333HMGR4jL2bFjh31WyPfff+/wHoNozef8+fPKz8932Gf2Vux7772nPn36yM/PT9Klim2dOnXsPwCdPXtWKSkpGjNmjJFhohyopMDl6tSpo7lz5xZZPv3jjz/WI488okOHDhkUGeAdzpw5oyeeeELvvfeejh8/XuR9sy8D4Ovrq8OHD6t27dqSLiVd27Ztsz/hOTs7W3Xq1DH9fXoj6uxuID8/XwcPHlRGRobD5ilOnDihJk2aFNnfpEkT0w/YA8xgzJgxWrNmjebMmSN/f3+9+uqreuaZZ1SnTh29/vrrRofntD/+rM3P3p6Ddo+Bdu/erfvvv18bN2502G+z2TzqmSgtWrRQSkqKXnzxRYf9KSkpatGihUFRobTOnDmjqVOnlriY2759+wyKrOIVFBToo48+0s6dOyVJ1157re644w75+voaHJlzPvnkE73++uvq3LmzBg8erI4dO6phw4aqX7++3nrrLQ0YMMDoEIFikaQYaNCgQapSpYo+/fRTj14ka9q0abr99tu1evVqtW/fXpK0adMmZWRkaMWKFQZHhz8zZMgQff755xo4cKBH/3+6Z88e3X777Tp48KCuueYaSVJycrKio6O1bNkyU0/BPnHihL31ERwcbK9g3nTTTRo2bJiRoQGXRZJioG3btmnr1q3FtkI8SadOnfTzzz9rzpw59p9Q+/btq0ceeUR16tQxODr8mRUrVmjZsmX2Qc+easSIEWrQoIE2bdqksLAwSdLx48f197//XSNGjNCyZcsMjrD8GjRooF9++UVXXnmlmjRpovfee0/XX3+9PvnkE49ZVHHlypUKCQmRdGlGYVpamn2g98mTJw2MDM5g4KyB2rZtq5kzZ+qmm24yOhSXO3/+vHbs2FFsu+CPA2rhXq666iotX75cTZs2NToUl6pevbq+/PLLIg+p2759u2688Ub79Hkzmjlzpnx9fTVixAitXr1avXr1ks1m04ULFzRjxgyNHDnS6BCdUpplDDyphe5NSFIMtGbNGj311FOaMmWKmjdvXmT9CbNPCyyUmpqq+Ph4HT9+vMiANv7hcH9vvvmmPv74Yy1atEhXXHGF0eG4TFhYmD799FN16NDBYf+GDRvUq1cvjxrkfeDAAW3dulUNGzZUbGys0eEAJSJJMdDvs//f9/k9beBso0aN1K1bN02YMEERERFGh4MyatWqlfbu3SubzaaYmJgiyfQ333xjUGQVKz4+Xt98843mz5+v66+/XpL01VdfaejQoWrTpo0WLlxobIDldOHCBfXo0UNz58716IfsnT17Vnv37i1SCZOkH374QfXr11dgYKABkcEZjEkx0Nq1a40OoVJkZ2dr9OjRJCgm1adPH6NDqBQvvviiEhIS1L59e3siduHCBfXu3VsvvPCCwdGVX9WqVbVjxw6jw3C5/Px8tWvXTunp6fYkU5J+/PFHtWrVShkZGSQpJkQlxWAnT57U/Pnz7QNKmzVrpgceeMA+AMwT3H///brxxhv1wAMPGB2KyxX+dfKkGTAJCQm6//771alTJ6NDqRR79uzRjz/+KOnS38eGDRsaHJHzRo0aJX9/f02dOtXoUFzqnnvuUe3atZWSkmLfN27cOG3bts1jZhIWt2Jwefj5+SkgIKACInItkhQDbdmyRT169FBAQIA98//666917tw5ffbZZ2rdurXBEVaMs2fP6u6771atWrWKHXszYsQIgyKrOPPnz9fMmTO1e/duSZdaXElJSRoyZIjBkTmvT58+Wr58uerXr6/Bgwdr0KBBHjsry1O/jo8++qhef/11NWrUSG3atFH16tUd3veU5y8tW7ZMgwYN0uHDh1WlShXZbDbVr19fzz//vO655x6jw3Pa+fPnddVVVykrK8vpz4qMjNQvv/zi9okKSYqBChdUmjdvnqpUudR5u3jxooYMGaJ9+/Zp3bp1BkdYMebPn6+HH35YAQEBqlmzpkOVwWKxmH4xsAkTJmjGjBl69NFHHdaBSUlJ0ahRozR58mSDI3Te0aNH9cYbb2jRokX68ccf1aVLF91///3q06dPkaTTrDzt67hjxw5dd9118vHx0S233FLicRaLRWvWrKnEyFynoKBA9erV09y5c9W7d2+tXbtWd955p7KysuzP9TGz3NxchYSEKDMz06mJFbm5uYqOjlZOTo77T9CwwTABAQG2nTt3Ftn/ww8/2KpVq2ZARK4RERFhe+6552wFBQVGh+IS4eHhtrfffrvI/rfffttWs2ZNAyJyra1bt9oSExNtAQEBtvDwcFtSUpJt165dRoflNE/7Ovr4+Niys7NtNpvNdtVVV9mOHTtmcESV47HHHrP17dvXZrPZbIMHD7Y9/PDDBkdUcXJycmySbCdPnrRZrdZybydPnrRJsuXk5Bh9S3+KZ/cYKDg4uNhn9GRmZiooKMiAiFwjPz9f/fr1K9VaBmZ04cIFxcXFFdnfpk0bXbx40YCIXOfw4cNatWqVVq1aJV9fX/Xs2VPfffedmjVrppkzZxodnlM87esYGhqqX375RZK0f//+IusTeaqEhAQtX75chw4d0pIlS5SQkGB0SBXOarM5vZmFZ37XMIl+/frpgQce0OLFi5WZmanMzEy9++67GjJkiPr37290eBUmISFBixcvNjoMlxk4cKDmzJlTZP8rr7ziEc9EuXDhgpYsWaK//vWvql+/vt5//30lJSXp119/1aJFi7R69Wq99957pmuH/JGnfR3vvPNOderUSVdddZUsFovi4uLUoEGDYjdP0rx5czVr1kwDBgxQVFSUbrjhBqNDghOYgmyg559/XhaLRfHx8faf1KpWraphw4Z51Cj8goICTZs2TStXrlRsbGyRMQyeMGhv/vz5+uyzz+z/IH711VfKyMhQfHy8Ro8ebT/OjPcaFRUlq9Wq/v37a/PmzWrZsmWRY2655RaPWF7dk76Or7zyivr27as9e/ZoxIgRGjp0qEdVaC8nPj5eo0aN0rPPPmt0KC5hs9mcetKzM+dWNgbOuoHCRYgk6eqrr/a4VT09fdDe5e7v98x6r2+88Ybuvvtut58F4CxP/joOHjxYL774otckKSdOnNB//vMfPfTQQ4qMjDQ6nApTOHD22InjTg+cDQ+raYqBsyQpAACYgDcmKbR7AAAwEavt0ubM+WZBkgIAgIl405gUZve4kby8PE2aNEl5eXlGh+ISnn5/EvfoCTz9/iTuEebBmBQ3UthvNEOfsDw8/f4k7tETePr9SdyjWRXe0+GjR50ekxJVq5Yp/mxo9wAAYCLe1O4hSQEAwERIUiCr1apff/1VQUFBDg/Ec6Xc3FyH/3oaT78/iXv0BJ5+fxL36Ao2m02nTp1SnTp1PPYRIEZgTEoJDh48qOjoaKPDAACYSGZmpurVq+eSz7Y/BTkry/mnIEdGMibFzLxlZUY/v2pGh+BS1auHGB2Cy+XkHDU6BJdr27an0SG43E09uxgdgkulTHnS6BBcymazKT//bKV876Ddg0pr8RjN0+/TG8qunv41lKQqVar++UEm5x/g2T8weMP/p5L33GdlIUkBAMBEbP/75cz5ZkGSAgCAiXjTsvieXwsHAACmRCUFAAAzcXLgrBg4CwAAXMFqs8nqRKLhzLmVjXYPAABwS1RSAAAwEdZJAQAAbokkBQAAuCXGpAAAABiMSgoAACZCuwcAALglb1oWn3YPAABwS1RSAAAwEW96dg9JCgAAJmKTc+NKTJSj0O4BAADuiUoKAAAmwuweAADglrxpMTeSFAAATMSbKimMSQEAAG6JSgoAACbiTe0e01ZSOnfurKSkJKPDAACgcv2v3VPeTSQpAAAAzqHdAwCAifDsHpOwWq0aM2aMwsLCFBkZqUmTJtnfO3nypIYMGaJatWopODhYt956q7Zv317iZ+Xl5Sk3N9dhAwDA3RQui+/MZhamTlIWLVqk6tWr66uvvtK0adM0efJkrVq1SpJ0991368iRI1qxYoW2bt2q1q1b6y9/+YtOnDhR7GclJycrJCTEvkVHR1fmrQAAgD8wdZISGxuriRMnqlGjRoqPj1dcXJzS0tL0xRdfaPPmzXr//fcVFxenRo0a6fnnn1doaKg++OCDYj9r3LhxysnJsW+ZmZmVfDcAAPw5ZwbNOrvGSmUz9ZiU2NhYh9dRUVE6cuSItm/frtOnT6tmzZoO7587d0579+4t9rP8/f3l7+/vslgBAKgI3rSYm6mTlKpVqzq8tlgsslqtOn36tKKiopSenl7knNDQ0MoJDgAAOMXUSUpJWrduraysLFWpUkUxMTFGhwMAQIVhMTeT69Kli9q3b68+ffros88+0/79+7Vx40aNHz9eW7ZsMTo8AADKjTEpJmexWLR8+XKNHz9egwcP1tGjRxUZGambb75ZERERRocHAEC5MSbFBIobb/LRRx/Zfx8UFKQXX3xRL774YuUFBQAAKoxpkxQAALyRN41JIUkBAMBEWBYfAADAYFRSAAAwEWefv2OmZ/eQpAAAYCLeNLuHdg8AAHBLVFIAADARb6qkkKQAAGAiNienIJspSaHdAwAA3BKVFAAATIR2DwAAcEs2OZdomCdFIUkBAMBUvGlZfMakAAAAt0QlBQAAE/GmZ/eQpAAAYCLetCw+7R4AAOCWqKQAAGAiTEEGAABuyZuSFNo9AADALZGkAABgIoXrpDizlcfs2bMVExOjgIAAtWvXTps3b77s8bNmzdI111yjatWqKTo6WqNGjdL58+fLdE3aPV6ualV/o0Nwqfz8sv2FgHs6ceKw0SG4XLXAakaH4FKBgaFGh+BSVqtVeXlnKuVaRrR7Fi9erNGjR2vu3Llq166dZs2ape7du+vnn39W7dq1ixz/9ttva+zYsVqwYIE6dOigXbt2adCgQbJYLJoxY0apr0slBQAAXNaMGTM0dOhQDR48WM2aNdPcuXN1xRVXaMGCBcUev3HjRt1444267777FBMTo27duql///5/Wn35I5IUAABMpLCS4swmSbm5uQ5bXl5esdfLz8/X1q1b1aVLF/s+Hx8fdenSRZs2bSr2nA4dOmjr1q32pGTfvn1avny5evbsWaZ7pd0DAICJVNSze6Kjox32T5w4UZMmTSpy/LFjx1RQUKCIiAiH/REREfrpp5+KvcZ9992nY8eO6aabbpLNZtPFixf18MMP68knnyxTrCQpAACYSEUti5+Zmang4GD7fn//ihujmJ6erilTpuill15Su3bttGfPHo0cOVL//Oc/9fTTT5f6c0hSAADwQsHBwQ5JSknCw8Pl6+ur7Oxsh/3Z2dmKjIws9pynn35aAwcO1JAhQyRJzZs315kzZ/Tggw9q/Pjx8vEp3WgTxqQAAGAiNpvzW1n4+fmpTZs2SktLs++zWq1KS0tT+/btiz3n7NmzRRIRX1/f/8Vf+gCopAAAYCI2J8eklGcK8ujRo5WQkKC4uDhdf/31mjVrls6cOaPBgwdLkuLj41W3bl0lJydLknr16qUZM2aoVatW9nbP008/rV69etmTldIgSQEAAJfVr18/HT16VBMmTFBWVpZatmyp1NRU+2DajIwMh8rJU089JYvFoqeeekqHDh1SrVq11KtXLz333HNlui5JCgAAJmLUs3sSExOVmJhY7Hvp6ekOr6tUqaKJEydq4sSJ5bqW/XOcOhsAAFSqipqCbAYMnAUAAG6JSgoAACZiVLvHCCQpAACYiDclKbR7AACAW6KSAgCAiXjTwFmSFAAATKSint1jBiQpAACYSHmWtv/j+WbBmBQAAOCWqKQAAGAijEkBAABuySbnphGbJ0Wh3QMAANwUlRQAAEyEdg8AAHBLrDhrYp07d1ZSUpLRYQAAACd5XCXlww8/VNWqVY0OAwAAl/CmSorHJSlhYWFGhwAAgOt40WpuHt3ueemll9SoUSMFBAQoIiJCd911l7HBAQCAUvO4SkqhLVu2aMSIEXrjjTfUoUMHnThxQuvXry/x+Ly8POXl5dlf5+bmVkaYAACUic1qk83qRLvHiXMrm8cmKRkZGapevbr++te/KigoSPXr11erVq1KPD45OVnPPPNMJUYIAEA5ONntMdNqbh7X7inUtWtX1a9fXw0aNNDAgQP11ltv6ezZsyUeP27cOOXk5Ni3zMzMSowWAIDSKRw468xmFh6bpAQFBembb77RO++8o6ioKE2YMEEtWrTQyZMniz3e399fwcHBDhsAADCOxyYpklSlShV16dJF06ZN044dO7R//36tWbPG6LAAACg3b6qkeOyYlE8//VT79u3TzTffrBo1amj58uWyWq265pprjA4NAIByY50UDxAaGqoPP/xQkyZN0vnz59WoUSO98847uvbaa40ODQAAlILHJSnp6enF/h4AAE/AFGQAAOCWvKnd49EDZwEAgHlRSQEAwES8qZJCkgIAgJnwgEEAAABjUUkBAMBEvKiQQpICAICZ2GxOTkE2UZZCkgIAgIl408BZxqQAAAC3RCUFAAAT8aZKCkkKAAAm4k1JCu0eAADglqikAABgIt5USSFJAQDATKySnHmSsbXCInE52j0AAMAtUUkBAMBEaPcAAAC35E3L4tPuAQAAbolKCgAAJkK7BwAAuCWSFAAA4JZsViefguzM9OVKRpLi5apXDzE6BDgpP++c0SG4XM2adY0OweUyfsowOgSXOnnyiNEhuJSZqhNmQpICAICZONnuMdP0HpIUAABMxJvGpDAFGQAAuCUqKQAAmIg3VVJIUgAAMBMvWnKWdg8AAHBLVFIAADARm/XS5sz5ZkGSAgCAidjk5JgU0e4BAABwCpUUAABMhNk9AADALZGkAAAAt+RNSQpjUgAAgFuikgIAgInYrDbZrE5UUpw4t7KRpAAAYCasOAsAAPD/Zs+erZiYGAUEBKhdu3bavHnzZY8/efKkhg8frqioKPn7+6tx48Zavnx5ma5JJQUAABMxYuDs4sWLNXr0aM2dO1ft2rXTrFmz1L17d/3888+qXbt2kePz8/PVtWtX1a5dWx988IHq1q2rAwcOKDQ0tEzXJUkBAMBEjOj2zJgxQ0OHDtXgwYMlSXPnztWyZcu0YMECjR07tsjxCxYs0IkTJ7Rx40ZVrVpVkhQTE1Pm69LuAQDAC+Xm5jpseXl5xR6Xn5+vrVu3qkuXLvZ9Pj4+6tKlizZt2lTsOUuXLlX79u01fPhwRURE6LrrrtOUKVNUUFBQphhJUgAAMJHCdo8zmyRFR0crJCTEviUnJxd7vWPHjqmgoEAREREO+yMiIpSVlVXsOfv27dMHH3yggoICLV++XE8//bSmT5+uZ599tkz3SrsHAAATqagpyJmZmQoODrbv9/f3dzq2QlarVbVr19Yrr7wiX19ftWnTRocOHdK///1vTZw4sdSfQ5ICAIAXCg4OdkhSShIeHi5fX19lZ2c77M/OzlZkZGSx50RFRalq1ary9fW172vatKmysrKUn58vPz+/UsVIuwcAABOpqHZPafn5+alNmzZKS0uz77NarUpLS1P79u2LPefGG2/Unj17ZLVa7ft27dqlqKioUicokouTFJvNpgcffFBhYWGyWCwKDQ1VUlKSKy8JAIBHuzS7x5kkpezXHD16tObNm6dFixZp586dGjZsmM6cOWOf7RMfH69x48bZjx82bJhOnDihkSNHateuXVq2bJmmTJmi4cOHl+m6Lm33pKamauHChUpPT1eDBg101113ufJyJYqJiVFSUhIJEgDA9IxYJ6Vfv346evSoJkyYoKysLLVs2VKpqan2wbQZGRny8fn/ukd0dLRWrlypUaNGKTY2VnXr1tXIkSP1xBNPlOm6Lk1S9u7dq6ioKHXo0OHSxaowBAYAADNKTExUYmJise+lp6cX2de+fXt9+eWXTl3TZe2eQYMG6dFHH1VGRoYsFkuxi7hYLBZ99NFHDvtCQ0O1cOFC++vMzEzdc889Cg0NVVhYmHr37q39+/c7XKdPnz56/vnnFRUVpZo1a2r48OG6cOGCJKlz5846cOCARo0aJYvFIovF4oK7BQCgclT2mBQjuSxJeeGFFzR58mTVq1dPhw8f1tdff13mz7hw4YK6d++uoKAgrV+/Xhs2bFBgYKB69Oih/Px8+3Fr167V3r17tXbtWi1atEgLFy60Jzoffvih6tWrp8mTJ+vw4cM6fPhwsdfKy8srsrANAABux2pzfjMJlyUpISEhCgoKkq+vryIjI1WrVq0yf8bixYtltVr16quvqnnz5mratKlee+01ZWRkOJSWatSooZSUFDVp0kR//etfdfvtt9tHIYeFhcnX11dBQUGKjIwscbpUcnKyw6I20dHR5bpvAABQMdx6CvL27du1Z88eBQUFKTAwUIGBgQoLC9P58+e1d+9e+3HXXnutw1zsqKgoHTlypEzXGjdunHJycuxbZmZmhd0HAAAVxab/f35PuTajb6AMDB3JarFYivTGCseSSNLp06fVpk0bvfXWW0XO/X1lpvDhRb//3N/PzS4Nf3//Cl1tDwAAl3B2XImJxqQYmqTUqlXLYYzI7t27dfbsWfvr1q1ba/Hixapdu3apVsUriZ+fX5kfagQAAIxlaLvn1ltvVUpKir799ltt2bJFDz/8sENVZMCAAQoPD1fv3r21fv16/fLLL0pPT9eIESN08ODBUl8nJiZG69at06FDh3Ts2DFX3AoAAJWC2T2VZPr06YqOjlbHjh1133336fHHH9cVV1xhf/+KK67QunXrdOWVV6pv375q2rSpHnjgAZ0/f75MlZXJkydr//79uvrqq8s1gBcAAHdR+IBBZzazsNjMlFJVotzcXIWEhBgdhstFRMQYHQKcdPK37D8/yOTaXn+70SG4XNNWrYwOwaVem/OM0SG4lM1mU0HBBeXk5Dg1POFyCr8vjXkuRf4B1cr9OXnnz2na+ESXxlpRWAIWAAATMWJZfKOQpAAAYCIkKQAAwD0VLnjizPkm4daLuQEAAO9FJQUAABOh3QMAANySzXppc+Z8s6DdAwAA3BKVFAAATIR2DwAAcEvelKTQ7gEAAG6JSgoAACbiTZUUkhQAAEzEm5IU2j0AAMAtUUkBAMBEbFabbFYnKilOnFvZSFIAADARb2r3kKQAAGAqTj5gUOZJUhiTAgAA3BKVFAAATMTmZCHFRN0ekhQAAMzkUpLizJiUCgzGxWj3AAAAt0QlxctlZ+83OgSXslg8Pw+3mem56+W0ceN/jQ7B5U6cOGx0CC71381fGh2CS509fVr9br65Uq7FFGQAAOCWvGkKsuf/mAkAAEyJSgoAACbiTZUUkhQAAMzEySTFTNN7aPcAAAC3RCUFAAAz8aLV3EhSAAAwEaYgAwAAt+RFhRTGpAAAAPdEJQUAABNhCjIAAHBL3pSk0O4BAABuiUoKAAAm4k2VFJIUAABMxJumINPuAQAAbolKCgAAJkK7BwAAuCknV3OTeZIU2j0AAMAtUUkBAMBEaPcAAAC35E3P7iFJAQDARJiCDAAAYDAqKQAAmAhjUgAAgFvypiTFY9o9nTt3VlJSktFhAACACuIxlZQPP/xQVatWNToMAABcypsqKR6TpISFhRkdAgAALndpCrIzSUoFBuNiHtnuiYmJ0ZQpU3T//fcrKChIV155pV555RVjAwQAAGXiMUnKH02fPl1xcXH69ttv9cgjj2jYsGH6+eefSzw+Ly9Pubm5DhsAAO6mcJ0UZzaz8NgkpWfPnnrkkUfUsGFDPfHEEwoPD9fatWtLPD45OVkhISH2LTo6uhKjBQCglAqXnHVmMwmPTVJiY2Ptv7dYLIqMjNSRI0dKPH7cuHHKycmxb5mZmZURJgAAKIHHDJz9oz/O9LFYLLJarSUe7+/vL39/f1eHBQCAU3h2DwAAcEveNAXZY9s9AAB4pP8lKeXdyltKmT17tmJiYhQQEKB27dpp8+bNpTrv3XfflcViUZ8+fcp8TZIUAABwWYsXL9bo0aM1ceJEffPNN2rRooW6d+9+2bGekrR//349/vjj6tixY7mu6zHtnvT0dPvv9+/fX+T9bdu2VVosAAC4irPTiMtz7owZMzR06FANHjxYkjR37lwtW7ZMCxYs0NixY4s9p6CgQAMGDNAzzzyj9evX6+TJk2W+LpUUAABMxJlWz+/Hs/xxbbC8vLxir5efn6+tW7eqS5cu9n0+Pj7q0qWLNm3aVGKckydPVu3atfXAAw+U+15JUgAA8ELR0dEO64MlJycXe9yxY8dUUFCgiIgIh/0RERHKysoq9pwvvvhC8+fP17x585yK0WPaPQAAeAObnJzdo0vnZmZmKjg42L6/opbhOHXqlAYOHKh58+YpPDzcqc8iSQEAwEQqagpycHCwQ5JSkvDwcPn6+io7O9thf3Z2tiIjI4scv3fvXu3fv1+9evWy7ytcp6xKlSr6+eefdfXVV5cqVto9AACgRH5+fmrTpo3S0tLs+6xWq9LS0tS+ffsixzdp0kTfffedtm3bZt/uuOMO3XLLLdq2bVuZHjtDJQUAADMxYMnZ0aNHKyEhQXFxcbr++us1a9YsnTlzxj7bJz4+XnXr1lVycrICAgJ03XXXOZwfGhoqSUX2/xmSFAAATMRmvbQ5c35Z9evXT0ePHtWECROUlZWlli1bKjU11T6YNiMjQz4+Fd+cIUkBAAB/KjExUYmJicW+9/u1yoqzcOHCcl2TJAUAABPxpmf3kKQAAGAiJCkAAMAteVOSwhRkAADglqikAABgIt5USSFJAQDARIx4CrJRaPcAAAC3RCUFAAAzMWDFWaOQpAAAYCK2//1y5nyzoN0DAADcEpUUAABMhNk9AADALV1KUsr/hEGSFMBNOPMXGe7Dai0wOgSX27lzk9EhuNRtLVoYHYJL5ebmGh2CRyJJAQDARGj3AAAAt0SSAgAA3JI3JSlMQQYAAG6JSgoAACZis1mdnN1jngkFJCkAAJiJFy2LT7sHAAC4JSopAACYiDc9u4ckBQAAU3Fudo9MlKTQ7gEAAG6JSgoAACbiTeukkKQAAGAi3jQFmXYPAABwS1RSAAAwEdo9AADALZGkAAAAt+RNSQpjUgAAgFuikgIAgJl40bN7SFIAADCRS4viOzEFmRVnAQAAnEMlBQAAE/GmgbMkKQAAmIg3JSm0ewAAgFuq0CTFZrPpwQcfVFhYmCwWi7Zt21aRH19qCxcuVGhoqCHXBgDAlQorKc5sZlGh7Z7U1FQtXLhQ6enpatCggcLDwyvy4wEA8Hre9IDBCk1S9u7dq6ioKHXo0KHY9/Pz8+Xn51eRlwQAwKswJqUcBg0apEcffVQZGRmyWCyKiYlR586dlZiYqKSkJIWHh6t79+6SpO+//1633XabAgMDFRERoYEDB+rYsWP2z+rcubNGjBihMWPGKCwsTJGRkZo0aZLD9U6ePKmHHnpIERERCggI0HXXXadPP/3U4ZiVK1eqadOmCgwMVI8ePXT48OGKul0AAOBiFZakvPDCC5o8ebLq1aunw4cP6+uvv5YkLVq0SH5+ftqwYYPmzp2rkydP6tZbb1WrVq20ZcsWpaamKjs7W/fcc4/D5y1atEjVq1fXV199pWnTpmny5MlatWqVJMlqteq2227Thg0b9Oabb+rHH3/U1KlT5evraz//7Nmzev755/XGG29o3bp1ysjI0OOPP15i/Hl5ecrNzXXYAABwN4xJKYeQkBAFBQXJ19dXkZGR9v2NGjXStGnT7K+fffZZtWrVSlOmTLHvW7BggaKjo7Vr1y41btxYkhQbG6uJEyfaPyMlJUVpaWnq2rWrVq9erc2bN2vnzp324xs0aOAQz4ULFzR37lxdffXVkqTExERNnjy5xPiTk5P1zDPPOPmnAACAi3nRsvgun4Lcpk0bh9fbt2/X2rVrFRgYaN+aNGki6dKYlkKxsbEO50VFRenIkSOSpG3btqlevXr2BKU4V1xxhT1B+eP5xRk3bpxycnLsW2ZmZulvEgAAVDiXL+ZWvXp1h9enT59Wr1699K9//avIsVFRUfbfV61a1eE9i8Uiq/XSiORq1ar96XWLO/9yJS5/f3/5+/v/6ecCAGAk2/9+OXO+WVT6irOtW7fWkiVLFBMToypVynf52NhYHTx40KE9BACAN/CmKciVvuLs8OHDdeLECfXv319ff/219u7dq5UrV2rw4MEqKCgo1Wd06tRJN998s+68806tWrVKv/zyi1asWKHU1FQXRw8AACpLpScpderU0YYNG1RQUKBu3bqpefPmSkpKUmhoqHx8Sh/OkiVL1LZtW/Xv31/NmjXTmDFjSp3kAABgVt40u8diM1O0lSg3N1chISFGhwHAS1gsnv0otQsXLxgdgkvl5uYqrEYN5eTkKDg42GXXCAkJUVzcbapSpeqfn1CCixcvaMuWFS6NtaJ49t8KAABgWpU+cBYAAJSfNy2LT5ICAICpODe7RzLP7B6SFAAATMSbKimMSQEAAG6JSgoAAGbiRc/uIUkBAMBEbHJuaXvzpCi0ewAAgJuikgIAgIl408BZkhQAAEyEBwwCAAAYjEoKAAAmQrsHAAC4JW9KUmj3AAAAt0QlBQAAE6GSAgAA3FJhkuLMVh6zZ89WTEyMAgIC1K5dO23evLnEY+fNm6eOHTuqRo0aqlGjhrp06XLZ40tCkgIAgJnYrM5vZbR48WKNHj1aEydO1DfffKMWLVqoe/fuOnLkSLHHp6enq3///lq7dq02bdqk6OhodevWTYcOHSrTdS02M9V9KlFubq5CQkKMDgOAl7BYPPtnxgsXLxgdgkvl5uYqrEYN5eTkKDg42GXXCAkJ0bXNbpSvb/lHaxQUXNQPP24oU6zt2rVT27ZtlZKSIkmyWq2Kjo7Wo48+qrFjx5bimgWqUaOGUlJSFB8fX+pYPftvBQAAHsZWAb+kS0nP77e8vLxir5efn6+tW7eqS5cu9n0+Pj7q0qWLNm3aVKqYz549qwsXLigsLKxM90qSAgCAiVTUmJTo6GiFhITYt+Tk5GKvd+zYMRUUFCgiIsJhf0REhLKyskoV8xNPPKE6deo4JDqlweweAAC8UGZmpkO7x9/f3yXXmTp1qt59912lp6crICCgTOeSpACAGzDT81TKw9fHswv3lXl/FTUFOTg4uFRjUsLDw+Xr66vs7GyH/dnZ2YqMjLzsuc8//7ymTp2q1atXKzY2tsyxevb/NQAAeJjCBww6s5WFn5+f2rRpo7S0NPs+q9WqtLQ0tW/fvsTzpk2bpn/+859KTU1VXFxcue6VSgoAALis0aNHKyEhQXFxcbr++us1a9YsnTlzRoMHD5YkxcfHq27duvZxLf/61780YcIEvf3224qJibGPXQkMDFRgYGCpr0uSAgCAiRix4my/fv109OhRTZgwQVlZWWrZsqVSU1Ptg2kzMjLk87uW15w5c5Sfn6+77rrL4XMmTpyoSZMmlfq6rJNSAtZJAYCK4+nfagq/Z1TGOimNGsU5vU7K7t1bXBprRWFMCgAAcEu0ewAAMBFvesAgSQoAAGZik+RMomGeHIUkBQAAM7HJKpssTp1vFoxJAQAAbolKCgAAJsKYFAAA4KacS1LMNCiFdg8AAHBLVFIAADAR2j0AAMAtXXpIoBOze0z0xG3aPQAAwC1RSQEAwERo9wAAALfkTUkK7R4AAOCWqKQAAGAmNpuTz+4xTyWFJAUAABOx/e+XM+ebBUkKAAAmwhRkAAAAg1FJAQDARLxpdg9JCgAAJuJNSYpL2z0Wi6XY7d1337UfU1BQoJkzZ6p58+YKCAhQjRo1dNttt2nDhg0On1VQUKCpU6eqSZMmqlatmsLCwtSuXTu9+uqrrrwFAABgkAqvpPz222+qWrWqAgMDJUmvvfaaevTo4XBMaGiopEvZ3L333qvVq1fr3//+t/7yl78oNzdXs2fPVufOnfX++++rT58+kqRnnnlGL7/8slJSUhQXF6fc3Fxt2bJFv/32m/1zf/31V9WuXVtVqlAgAgB4Jm+qpFTId/OLFy9q5cqVWrhwoT755BN99dVXatGihaRLCUlkZGSx57333nv64IMPtHTpUvXq1cu+/5VXXtHx48c1ZMgQde3aVdWrV9fSpUv1yCOP6O6777YfV3iNQvPmzdOcOXP097//XQkJCWrevHlF3B4AAG7Dm5IUp9o93333nR577DHVq1dP8fHxqlWrltauXVskeSjJ22+/rcaNGzskKIUee+wxHT9+XKtWrZIkRUZGas2aNTp69GiJn/fEE0/ohRde0M6dO9W6dWu1bt1aL7744mXPKZSXl6fc3FyHDQAAGKfMScrx48f1wgsvqHXr1oqLi9O+ffv00ksv6fDhw3rppZfUvn17h+P79++vwMBAhy0jI0OStGvXLjVt2rTY6xTu37VrlyRpxowZOnr0qCIjIxUbG6uHH35YK1ascDgnICBA/fr107Jly3To0CHFx8dr4cKFqlu3rvr06aP//ve/unjxYrHXS05OVkhIiH2Ljo4u6x8NAAAud6mSYnVi8+BKyn/+8x8lJSUpMDBQe/bs0X//+1/17dtXfn5+xR4/c+ZMbdu2zWGrU6eO/f3S/mE1a9ZM33//vb788kvdf//9OnLkiHr16qUhQ4YUe3zt2rWVlJSkb775Rh9//LE2bdqkvn376vvvvy/2+HHjxiknJ8e+ZWZmliouAAAqVeGy+M5sJlHmMSkPPvigqlSpotdff13XXnut7rzzTg0cOFCdO3eWj0/RnCcyMlINGzYs9rMaN26snTt3Fvte4f7GjRvb9/n4+Kht27Zq27atkpKS9Oabb2rgwIEaP368rrrqKofzT506pQ8++EBvvPGG1q1bp06dOikhIUHNmjUr9nr+/v7y9/cv1Z8BAABwvTJXUurUqaOnnnpKu3btUmpqqvz8/NS3b1/Vr19fY8eO1Q8//FDqz7r33nu1e/duffLJJ0Xemz59umrWrKmuXbuWeH5hwnHmzBlJl6Ypr1ixQvfdd58iIiI0depU/eUvf9G+ffuUlpam+Pj4Eis+AACYga0CfpmFUwNnO3TooJdffllZWVn697//rW3btqlFixb67rvv7MecPHlSWVlZDlthUnHvvffqb3/7mxISEjR//nzt379fO3bs0EMPPaSlS5fq1VdfVfXq1SVJd911l2bOnKmvvvpKBw4cUHp6uoYPH67GjRurSZMmkqQpU6aof//+CgoK0urVq/Xzzz9r/PjxuvLKK525TQAA3Ebh7B5nNrOw2Co42l9//VWBgYEKDg6WxVL8A5CSk5M1duxYSZemL8+aNUsLFy7U7t27FRAQoPbt2+vpp5/WjTfeaD9n3rx5euedd/T9998rJydHkZGRuvXWWzVp0iTVr19fkrR//35FRkYqICDA6fvIzc1VSEiI058DADDXtNfyKPyekZOTo+DgYJdeIzy8XrHDK0rLarXq2LGDLo21olR4kuIpSFIAoOJ4+rcakhTXYGlWAABMxJsWcyNJAQDARLwpSXHpAwYBAADKi0oKAAAm4k2VFJIUAABMxdlpxOZJUmj3AAAAt0QlBQAAM7FZjT2/EpGkAABgIpeWtXdiTArtHgAAAOdQSQEAwEQuDZpldg8AAHAzJCkAAMAt2Zwc+Ors+ZWJMSkAAMAtUUkBAMBELnVrnGn3VFgoLkeSAgCAiTg7psRMY1Jo9wAAALdEJQUAABPxpkoKSQoAAGbibJJhoiSFdg8AAHBLVFIAADARm6ySLE6cb55KCklKCczUswMAd5ebm2t0CC5VeH+V8b2DMSnQqVOnjA4BADxGSEiI0SFUilOnTnnNvVYGkpQS1KlTR5mZmQoKCpLFUv6yWlnk5uYqOjpamZmZCg4OrpRrViZPvz+Je/QEnn5/EvfoCjabTadOnVKdOnUq5VpGnl+ZSFJK4OPjo3r16hly7eDgYI/9h0Py/PuTuEdP4On3J3GPFa2yKigkKQAAwC15U5LCFGQAAOCWqKS4EX9/f02cOFH+/v5Gh+ISnn5/EvfoCTz9/iTu0exsNienIJuokmKxmSlaAAC8VG5urkJCQuTrW9WpCR02m00FBReUk5Pj9mOSaPcAAAC3RLsHAAAz8aJn95CkAABgIs4ua2+mZfFp9wAAALdEJQUAABPxptk9JCkAAJgIi7kBAAAYjEoKAAAmY6ZqiDOopAAAYAJ+fn6KjIyskM+KjIyUn59fhXyWK7HiLAAAJnH+/Hnl5+c7/Tl+fn4KCAiogIhciyQFAAC4Jdo9AADALZGkAAAAt0SSAgAA3BJJCgAAcEskKQAAwC2RpAAAALdEkgIAANzS/wEiZv6X3jsvmwAAAABJRU5ErkJggg=="
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"evaluateAndShowAttention('On mówi płynnie po francusku')\n"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T20:03:26.594026Z",
"end_time": "2024-06-02T20:03:26.838018Z"
}
}
},
{
"cell_type": "markdown",
"source": [
"### BLEU"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
"execution_count": 103,
"outputs": [],
"source": [
"def filter_rows(row):\n",
" return len(row[\"eng\"].split(' '))<MAX_LENGTH and \\\n",
" len(row[\"pol\"].split(' '))<MAX_LENGTH and \\\n",
" row[\"eng\"].startswith(eng_prefixes)\n",
"\n",
"def evaluateWithTokenization(input_sentence):\n",
" input_sentence = normalizeString(input_sentence)\n",
" output_words, attentions = evaluate(encoder, decoder, input_sentence, input_lang, output_lang)\n",
" if \"<EOS>\" in output_words:\n",
" output_words.remove(\"<EOS>\")\n",
" return output_words"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T20:03:29.868015Z",
"end_time": "2024-06-02T20:03:29.884050Z"
}
}
},
{
"cell_type": "code",
"execution_count": 114,
"outputs": [],
"source": [
"df = pd.read_csv(\"data/eng-pol.txt\", sep='\\t', names=[\"eng\", \"pol\", \"attribution\"])\n",
"df[\"eng\"] = df[\"eng\"].apply(normalizeString)\n",
"df[\"pol\"] = df[\"pol\"].apply(normalizeString)\n",
"df_filtered = df.apply(filter_rows, axis=1)\n",
"test_df = df[df_filtered].sample(frac=1)\n",
"test_df[\"eng_token\"] = test_df[\"eng\"].apply(lambda x: x.split())\n",
"test_df[\"eng_eval\"] = test_df[\"pol\"].apply(lambda x: evaluateWithTokenization(x))"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T20:07:48.707058Z",
"end_time": "2024-06-02T20:08:22.246952Z"
}
}
},
{
"cell_type": "code",
"execution_count": 115,
"outputs": [],
"source": [
"references_corpus = test_df[\"eng_token\"].values.tolist()\n",
"candidate_corpus = test_df[\"eng_eval\"].values.tolist()\n",
"references_corpus = [[el] for el in references_corpus]"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T20:08:22.248949Z",
"end_time": "2024-06-02T20:08:22.262981Z"
}
}
},
{
"cell_type": "code",
"execution_count": 116,
"outputs": [
{
"data": {
"text/plain": "0.9301728010177612"
},
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bleu_score(candidate_corpus, references_corpus)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2024-06-02T20:08:22.264948Z",
"end_time": "2024-06-02T20:08:23.695461Z"
}
}
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}