{ "metadata": { "kernelspec": { "language": "python", "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python", "version": "3.10.13", "mimetype": "text/x-python", "codemirror_mode": { "name": "ipython", "version": 3 }, "pygments_lexer": "ipython3", "nbconvert_exporter": "python", "file_extension": ".py" }, "kaggle": { "accelerator": "nvidiaTeslaT4", "dataSources": [ { "sourceId": 8513800, "sourceType": "datasetVersion", "datasetId": 5082663 } ], "dockerImageVersionId": 30699, "isInternetEnabled": true, "language": "python", "sourceType": "notebook", "isGpuEnabled": true } }, "nbformat_minor": 4, "nbformat": 4, "cells": [ { "cell_type": "markdown", "source": [ "# Seq2Seq Fiński --> Angielski\n", "https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html" ], "metadata": {} }, { "cell_type": "code", "source": [ "from __future__ import unicode_literals, print_function, division\n", "from io import open\n", "import unicodedata\n", "import re\n", "import random\n", "\n", "import torch\n", "import torch.nn as nn\n", "from torch import optim\n", "import torch.nn.functional as F\n", "\n", "import numpy as np\n", "from torch.utils.data import TensorDataset, DataLoader, RandomSampler\n", "\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")" ], "metadata": { "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "execution": { "iopub.status.busy": "2024-05-25T14:03:55.886451Z", "iopub.execute_input": "2024-05-25T14:03:55.887266Z", "iopub.status.idle": "2024-05-25T14:04:02.514594Z", "shell.execute_reply.started": "2024-05-25T14:03:55.887232Z", "shell.execute_reply": "2024-05-25T14:04:02.513697Z" }, "trusted": true }, "execution_count": 1, "outputs": [] }, { "cell_type": "code", "source": [ "torch.cuda.device_count()" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:04:09.403445Z", "iopub.execute_input": "2024-05-25T14:04:09.403926Z", "iopub.status.idle": "2024-05-25T14:04:09.434533Z", "shell.execute_reply.started": "2024-05-25T14:04:09.403898Z", "shell.execute_reply": "2024-05-25T14:04:09.433678Z" }, "trusted": true }, "execution_count": 2, "outputs": [ { "execution_count": 2, "output_type": "execute_result", "data": { "text/plain": "2" }, "metadata": {} } ] }, { "cell_type": "markdown", "source": [ "### Konwersja słów na index" ], "metadata": {} }, { "cell_type": "code", "source": [ "SOS_token = 0\n", "EOS_token = 1\n", "\n", "class Lang:\n", " def __init__(self, name):\n", " self.name = name\n", " self.word2index = {}\n", " self.word2count = {}\n", " self.index2word = {0: \"SOS\", 1: \"EOS\"}\n", " self.n_words = 2 # Count SOS and EOS\n", "\n", " def addSentence(self, sentence):\n", " for word in sentence.split(' '):\n", " self.addWord(word)\n", "\n", " def addWord(self, word):\n", " if word not in self.word2index:\n", " self.word2index[word] = self.n_words\n", " self.word2count[word] = 1\n", " self.index2word[self.n_words] = word\n", " self.n_words += 1\n", " else:\n", " self.word2count[word] += 1" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:04:14.014114Z", "iopub.execute_input": "2024-05-25T14:04:14.014490Z", "iopub.status.idle": "2024-05-25T14:04:14.024526Z", "shell.execute_reply.started": "2024-05-25T14:04:14.014461Z", "shell.execute_reply": "2024-05-25T14:04:14.023673Z" }, "trusted": true }, "execution_count": 3, "outputs": [] }, { "cell_type": "markdown", "source": [ "### Normalizacja tekstu" ], "metadata": {} }, { "cell_type": "code", "source": [ "# Turn a Unicode string to plain ASCII, thanks to\n", "# https://stackoverflow.com/a/518232/2809427\n", "def unicodeToAscii(s):\n", " return ''.join(\n", " c for c in unicodedata.normalize('NFD', s)\n", " if unicodedata.category(c) != 'Mn'\n", " )\n", "\n", "# Lowercase, trim, and remove non-letter characters\n", "def normalizeString(s):\n", " s = unicodeToAscii(s.lower().strip())\n", " s = re.sub(r\"([.!?])\", r\" \\1\", s)\n", " s = re.sub(r\"[^a-zA-Z!?]+\", r\" \", s)\n", " return s.strip()" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:04:23.431898Z", "iopub.execute_input": "2024-05-25T14:04:23.432285Z", "iopub.status.idle": "2024-05-25T14:04:23.438688Z", "shell.execute_reply.started": "2024-05-25T14:04:23.432256Z", "shell.execute_reply": "2024-05-25T14:04:23.437569Z" }, "trusted": true }, "execution_count": 4, "outputs": [] }, { "cell_type": "markdown", "source": [ "### Wczytywanie danych (zmodyfikowane ze względu na ścieżkę w kaggle)" ], "metadata": {} }, { "cell_type": "code", "source": [ "# Zmodyfikowana wersja ze względu na użycie pojedynczego pliku przesłanego na Kaggle\n", "def readLangs(reverse=False):\n", " print(\"Reading lines...\")\n", " lang1=\"en\"\n", " lang2=\"fin\"\n", " # Read the file and split into lines\n", " lines = open('/kaggle/input/anki-en-fin/fin.txt', encoding='utf-8').\\\n", " read().strip().split('\\n')\n", "\n", " # Split every line into pairs and normalize\n", " pairs = [[normalizeString(s) for s in l.split('\\t')[:-1]] for l in lines] # +Usuwanie licencji CC z linii\n", "\n", " # Reverse pairs, make Lang instances\n", " if reverse:\n", " pairs = [list(reversed(p)) for p in pairs]\n", " input_lang = Lang(lang2)\n", " output_lang = Lang(lang1)\n", " else:\n", " input_lang = Lang(lang1)\n", " output_lang = Lang(lang2)\n", "\n", " return input_lang, output_lang, pairs" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:12:25.385674Z", "iopub.execute_input": "2024-05-25T14:12:25.386029Z", "iopub.status.idle": "2024-05-25T14:12:25.394103Z", "shell.execute_reply.started": "2024-05-25T14:12:25.386002Z", "shell.execute_reply": "2024-05-25T14:12:25.392925Z" }, "trusted": true }, "execution_count": 14, "outputs": [] }, { "cell_type": "markdown", "source": [ "#### Ograniczenie do zdań max 10 słów, formy I am / You are / He is etc. bez interpunkcji" ], "metadata": {} }, { "cell_type": "code", "source": [ "MAX_LENGTH = 10\n", "\n", "eng_prefixes = (\n", " \"i am \", \"i m \",\n", " \"he is\", \"he s \",\n", " \"she is\", \"she s \",\n", " \"you are\", \"you re \",\n", " \"we are\", \"we re \",\n", " \"they are\", \"they re \"\n", ")\n", "\n", "def filterPair(p):\n", " return len(p[0].split(' ')) < MAX_LENGTH and \\\n", " len(p[1].split(' ')) < MAX_LENGTH and \\\n", " p[1].startswith(eng_prefixes)\n", "\n", "\n", "def filterPairs(pairs):\n", " return [pair for pair in pairs if filterPair(pair)]" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:12:29.729786Z", "iopub.execute_input": "2024-05-25T14:12:29.730147Z", "iopub.status.idle": "2024-05-25T14:12:29.737013Z", "shell.execute_reply.started": "2024-05-25T14:12:29.730121Z", "shell.execute_reply": "2024-05-25T14:12:29.735886Z" }, "trusted": true }, "execution_count": 15, "outputs": [] }, { "cell_type": "code", "source": [ "def prepareData(reverse=False):\n", " input_lang, output_lang, pairs = readLangs(reverse)\n", " print(\"Read %s sentence pairs\" % len(pairs))\n", " pairs = filterPairs(pairs)\n", " print(\"Trimmed to %s sentence pairs\" % len(pairs))\n", " print(\"Counting words...\")\n", " for pair in pairs:\n", " input_lang.addSentence(pair[0])\n", " output_lang.addSentence(pair[1])\n", " print(\"Counted words:\")\n", " print(input_lang.name, input_lang.n_words)\n", " print(output_lang.name, output_lang.n_words)\n", " return input_lang, output_lang, pairs\n", "\n", "input_lang, output_lang, pairs = prepareData(True)\n", "print(random.choice(pairs))" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:12:33.204103Z", "iopub.execute_input": "2024-05-25T14:12:33.204776Z", "iopub.status.idle": "2024-05-25T14:12:36.889693Z", "shell.execute_reply.started": "2024-05-25T14:12:33.204744Z", "shell.execute_reply": "2024-05-25T14:12:36.888700Z" }, "trusted": true }, "execution_count": 16, "outputs": [ { "name": "stdout", "text": "Reading lines...\nRead 72258 sentence pairs\nTrimmed to 5005 sentence pairs\nCounting words...\nCounted words:\nfin 3686\nen 1971\n['mina odotan joulua innolla', 'i am looking forward to christmas']\n", "output_type": "stream" } ] }, { "cell_type": "markdown", "source": [ "### Definicja modelu" ], "metadata": {} }, { "cell_type": "code", "source": [ "class EncoderRNN(nn.Module):\n", " def __init__(self, input_size, hidden_size, dropout_p=0.1):\n", " super(EncoderRNN, self).__init__()\n", " self.hidden_size = hidden_size\n", "\n", " self.embedding = nn.Embedding(input_size, hidden_size)\n", " self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)\n", " self.dropout = nn.Dropout(dropout_p)\n", "\n", " def forward(self, input):\n", " embedded = self.dropout(self.embedding(input))\n", " output, hidden = self.gru(embedded)\n", " return output, hidden" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:12:52.383787Z", "iopub.execute_input": "2024-05-25T14:12:52.384131Z", "iopub.status.idle": "2024-05-25T14:12:52.391196Z", "shell.execute_reply.started": "2024-05-25T14:12:52.384104Z", "shell.execute_reply": "2024-05-25T14:12:52.390316Z" }, "trusted": true }, "execution_count": 17, "outputs": [] }, { "cell_type": "code", "source": [ "class DecoderRNN(nn.Module):\n", " def __init__(self, hidden_size, output_size):\n", " super(DecoderRNN, self).__init__()\n", " self.embedding = nn.Embedding(output_size, hidden_size)\n", " self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)\n", " self.out = nn.Linear(hidden_size, output_size)\n", "\n", " def forward(self, encoder_outputs, encoder_hidden, target_tensor=None):\n", " batch_size = encoder_outputs.size(0)\n", " decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOS_token)\n", " decoder_hidden = encoder_hidden\n", " decoder_outputs = []\n", "\n", " for i in range(MAX_LENGTH):\n", " decoder_output, decoder_hidden = self.forward_step(decoder_input, decoder_hidden)\n", " decoder_outputs.append(decoder_output)\n", "\n", " if target_tensor is not None:\n", " # Teacher forcing: Feed the target as the next input\n", " decoder_input = target_tensor[:, i].unsqueeze(1) # Teacher forcing\n", " else:\n", " # Without teacher forcing: use its own predictions as the next input\n", " _, topi = decoder_output.topk(1)\n", " decoder_input = topi.squeeze(-1).detach() # detach from history as input\n", "\n", " decoder_outputs = torch.cat(decoder_outputs, dim=1)\n", " decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)\n", " return decoder_outputs, decoder_hidden, None # We return `None` for consistency in the training loop\n", "\n", " def forward_step(self, input, hidden):\n", " output = self.embedding(input)\n", " output = F.relu(output)\n", " output, hidden = self.gru(output, hidden)\n", " output = self.out(output)\n", " return output, hidden" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:12:54.393953Z", "iopub.execute_input": "2024-05-25T14:12:54.394808Z", "iopub.status.idle": "2024-05-25T14:12:54.409000Z", "shell.execute_reply.started": "2024-05-25T14:12:54.394765Z", "shell.execute_reply": "2024-05-25T14:12:54.407827Z" }, "trusted": true }, "execution_count": 18, "outputs": [] }, { "cell_type": "code", "source": [ "class BahdanauAttention(nn.Module):\n", " def __init__(self, hidden_size):\n", " super(BahdanauAttention, self).__init__()\n", " self.Wa = nn.Linear(hidden_size, hidden_size)\n", " self.Ua = nn.Linear(hidden_size, hidden_size)\n", " self.Va = nn.Linear(hidden_size, 1)\n", "\n", " def forward(self, query, keys):\n", " scores = self.Va(torch.tanh(self.Wa(query) + self.Ua(keys)))\n", " scores = scores.squeeze(2).unsqueeze(1)\n", "\n", " weights = F.softmax(scores, dim=-1)\n", " context = torch.bmm(weights, keys)\n", "\n", " return context, weights\n", "\n", "class AttnDecoderRNN(nn.Module):\n", " def __init__(self, hidden_size, output_size, dropout_p=0.1):\n", " super(AttnDecoderRNN, self).__init__()\n", " self.embedding = nn.Embedding(output_size, hidden_size)\n", " self.attention = BahdanauAttention(hidden_size)\n", " self.gru = nn.GRU(2 * hidden_size, hidden_size, batch_first=True)\n", " self.out = nn.Linear(hidden_size, output_size)\n", " self.dropout = nn.Dropout(dropout_p)\n", "\n", " def forward(self, encoder_outputs, encoder_hidden, target_tensor=None):\n", " batch_size = encoder_outputs.size(0)\n", " decoder_input = torch.empty(batch_size, 1, dtype=torch.long, device=device).fill_(SOS_token)\n", " decoder_hidden = encoder_hidden\n", " decoder_outputs = []\n", " attentions = []\n", "\n", " for i in range(MAX_LENGTH):\n", " decoder_output, decoder_hidden, attn_weights = self.forward_step(\n", " decoder_input, decoder_hidden, encoder_outputs\n", " )\n", " decoder_outputs.append(decoder_output)\n", " attentions.append(attn_weights)\n", "\n", " if target_tensor is not None:\n", " # Teacher forcing: Feed the target as the next input\n", " decoder_input = target_tensor[:, i].unsqueeze(1) # Teacher forcing\n", " else:\n", " # Without teacher forcing: use its own predictions as the next input\n", " _, topi = decoder_output.topk(1)\n", " decoder_input = topi.squeeze(-1).detach() # detach from history as input\n", "\n", " decoder_outputs = torch.cat(decoder_outputs, dim=1)\n", " decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)\n", " attentions = torch.cat(attentions, dim=1)\n", "\n", " return decoder_outputs, decoder_hidden, attentions\n", "\n", "\n", " def forward_step(self, input, hidden, encoder_outputs):\n", " embedded = self.dropout(self.embedding(input))\n", "\n", " query = hidden.permute(1, 0, 2)\n", " context, attn_weights = self.attention(query, encoder_outputs)\n", " input_gru = torch.cat((embedded, context), dim=2)\n", "\n", " output, hidden = self.gru(input_gru, hidden)\n", " output = self.out(output)\n", "\n", " return output, hidden, attn_weights" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:13:00.670299Z", "iopub.execute_input": "2024-05-25T14:13:00.670758Z", "iopub.status.idle": "2024-05-25T14:13:00.687695Z", "shell.execute_reply.started": "2024-05-25T14:13:00.670720Z", "shell.execute_reply": "2024-05-25T14:13:00.686610Z" }, "trusted": true }, "execution_count": 19, "outputs": [] }, { "cell_type": "code", "source": [ "def indexesFromSentence(lang, sentence):\n", " return [lang.word2index[word] for word in sentence.split(' ')]\n", "\n", "def tensorFromSentence(lang, sentence):\n", " indexes = indexesFromSentence(lang, sentence)\n", " indexes.append(EOS_token)\n", " return torch.tensor(indexes, dtype=torch.long, device=device).view(1, -1)\n", "\n", "def tensorsFromPair(pair):\n", " input_tensor = tensorFromSentence(input_lang, pair[0])\n", " target_tensor = tensorFromSentence(output_lang, pair[1])\n", " return (input_tensor, target_tensor)\n", "\n", "def get_dataloader(batch_size):\n", " input_lang, output_lang, pairs = prepareData(True)\n", "\n", " n = len(pairs)\n", " input_ids = np.zeros((n, MAX_LENGTH), dtype=np.int32)\n", " target_ids = np.zeros((n, MAX_LENGTH), dtype=np.int32)\n", "\n", " for idx, (inp, tgt) in enumerate(pairs):\n", " inp_ids = indexesFromSentence(input_lang, inp)\n", " tgt_ids = indexesFromSentence(output_lang, tgt)\n", " inp_ids.append(EOS_token)\n", " tgt_ids.append(EOS_token)\n", " input_ids[idx, :len(inp_ids)] = inp_ids\n", " target_ids[idx, :len(tgt_ids)] = tgt_ids\n", "\n", " train_data = TensorDataset(torch.LongTensor(input_ids).to(device),\n", " torch.LongTensor(target_ids).to(device))\n", "\n", " train_sampler = RandomSampler(train_data)\n", " train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)\n", " return input_lang, output_lang, train_dataloader" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:22:08.183866Z", "iopub.execute_input": "2024-05-25T14:22:08.184711Z", "iopub.status.idle": "2024-05-25T14:22:08.194870Z", "shell.execute_reply.started": "2024-05-25T14:22:08.184675Z", "shell.execute_reply": "2024-05-25T14:22:08.193965Z" }, "trusted": true }, "execution_count": 31, "outputs": [] }, { "cell_type": "code", "source": [ "def train_epoch(dataloader, encoder, decoder, encoder_optimizer,\n", " decoder_optimizer, criterion):\n", "\n", " total_loss = 0\n", " for data in dataloader:\n", " input_tensor, target_tensor = data\n", "\n", " encoder_optimizer.zero_grad()\n", " decoder_optimizer.zero_grad()\n", "\n", " encoder_outputs, encoder_hidden = encoder(input_tensor)\n", " decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden, target_tensor)\n", "\n", " loss = criterion(\n", " decoder_outputs.view(-1, decoder_outputs.size(-1)),\n", " target_tensor.view(-1)\n", " )\n", " loss.backward()\n", "\n", " encoder_optimizer.step()\n", " decoder_optimizer.step()\n", "\n", " total_loss += loss.item()\n", "\n", " return total_loss / len(dataloader)" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:16:38.894580Z", "iopub.execute_input": "2024-05-25T14:16:38.895410Z", "iopub.status.idle": "2024-05-25T14:16:38.902142Z", "shell.execute_reply.started": "2024-05-25T14:16:38.895382Z", "shell.execute_reply": "2024-05-25T14:16:38.900953Z" }, "trusted": true }, "execution_count": 22, "outputs": [] }, { "cell_type": "code", "source": [ "import time\n", "import math\n", "\n", "def asMinutes(s):\n", " m = math.floor(s / 60)\n", " s -= m * 60\n", " return '%dm %ds' % (m, s)\n", "\n", "def timeSince(since, percent):\n", " now = time.time()\n", " s = now - since\n", " es = s / (percent)\n", " rs = es - s\n", " return '%s (- %s)' % (asMinutes(s), asMinutes(rs))" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:16:43.069584Z", "iopub.execute_input": "2024-05-25T14:16:43.069953Z", "iopub.status.idle": "2024-05-25T14:16:43.075972Z", "shell.execute_reply.started": "2024-05-25T14:16:43.069926Z", "shell.execute_reply": "2024-05-25T14:16:43.075033Z" }, "trusted": true }, "execution_count": 23, "outputs": [] }, { "cell_type": "code", "source": [ "def train(train_dataloader, encoder, decoder, n_epochs, learning_rate=0.001,\n", " print_every=100, plot_every=100):\n", " start = time.time()\n", " plot_losses = []\n", " print_loss_total = 0 # Reset every print_every\n", " plot_loss_total = 0 # Reset every plot_every\n", "\n", " encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)\n", " decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)\n", " criterion = nn.NLLLoss()\n", "\n", " for epoch in range(1, n_epochs + 1):\n", " loss = train_epoch(train_dataloader, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)\n", " print_loss_total += loss\n", " plot_loss_total += loss\n", "\n", " if epoch % print_every == 0:\n", " print_loss_avg = print_loss_total / print_every\n", " print_loss_total = 0\n", " print('%s (%d %d%%) %.4f' % (timeSince(start, epoch / n_epochs),\n", " epoch, epoch / n_epochs * 100, print_loss_avg))\n", "\n", " if epoch % plot_every == 0:\n", " plot_loss_avg = plot_loss_total / plot_every\n", " plot_losses.append(plot_loss_avg)\n", " plot_loss_total = 0\n", "\n", " showPlot(plot_losses)" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:20:58.574148Z", "iopub.execute_input": "2024-05-25T14:20:58.574520Z", "iopub.status.idle": "2024-05-25T14:20:58.583203Z", "shell.execute_reply.started": "2024-05-25T14:20:58.574492Z", "shell.execute_reply": "2024-05-25T14:20:58.582230Z" }, "trusted": true }, "execution_count": 24, "outputs": [] }, { "cell_type": "code", "source": [ "import matplotlib.pyplot as plt\n", "plt.switch_backend('agg')\n", "import matplotlib.ticker as ticker\n", "import numpy as np\n", "%matplotlib inline\n", "\n", "def showPlot(points):\n", " plt.figure()\n", " fig, ax = plt.subplots()\n", " # this locator puts ticks at regular intervals\n", " loc = ticker.MultipleLocator(base=0.2)\n", " ax.yaxis.set_major_locator(loc)\n", " plt.plot(points)" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:21:00.586018Z", "iopub.execute_input": "2024-05-25T14:21:00.586719Z", "iopub.status.idle": "2024-05-25T14:21:00.592633Z", "shell.execute_reply.started": "2024-05-25T14:21:00.586683Z", "shell.execute_reply": "2024-05-25T14:21:00.591636Z" }, "trusted": true }, "execution_count": 25, "outputs": [] }, { "cell_type": "markdown", "source": [ "### Ewaluacja" ], "metadata": {} }, { "cell_type": "code", "source": [ "def evaluate(encoder, decoder, sentence, input_lang, output_lang):\n", " with torch.no_grad():\n", " input_tensor = tensorFromSentence(input_lang, sentence)\n", "\n", " encoder_outputs, encoder_hidden = encoder(input_tensor)\n", " decoder_outputs, decoder_hidden, decoder_attn = decoder(encoder_outputs, encoder_hidden)\n", "\n", " _, topi = decoder_outputs.topk(1)\n", " decoded_ids = topi.squeeze()\n", "\n", " decoded_words = []\n", " for idx in decoded_ids:\n", " if idx.item() == EOS_token:\n", " decoded_words.append('')\n", " break\n", " decoded_words.append(output_lang.index2word[idx.item()])\n", " return decoded_words, decoder_attn" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:21:01.858691Z", "iopub.execute_input": "2024-05-25T14:21:01.859612Z", "iopub.status.idle": "2024-05-25T14:21:01.866857Z", "shell.execute_reply.started": "2024-05-25T14:21:01.859574Z", "shell.execute_reply": "2024-05-25T14:21:01.865732Z" }, "trusted": true }, "execution_count": 26, "outputs": [] }, { "cell_type": "code", "source": [ "def evaluateRandomly(encoder, decoder, n=10):\n", " for i in range(n):\n", " pair = random.choice(pairs)\n", " print('Input sentence: ', pair[0])\n", " print('Target (true) translation:' , pair[1])\n", " output_words, _ = evaluate(encoder, decoder, pair[0], input_lang, output_lang)\n", " output_sentence = ' '.join(output_words)\n", " print('Output sentence: ', output_sentence)\n", " print('')" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:39:52.474985Z", "iopub.execute_input": "2024-05-25T14:39:52.475327Z", "iopub.status.idle": "2024-05-25T14:39:52.481801Z", "shell.execute_reply.started": "2024-05-25T14:39:52.475304Z", "shell.execute_reply": "2024-05-25T14:39:52.480957Z" }, "trusted": true }, "execution_count": 36, "outputs": [] }, { "cell_type": "markdown", "source": [ "# Wykorzystanie zdefiniowanych wyżej funkcji" ], "metadata": {} }, { "cell_type": "markdown", "source": [ "### Trenowanie modelu" ], "metadata": {} }, { "cell_type": "code", "source": [ "hidden_size = 128\n", "batch_size = 32\n", "\n", "input_lang, output_lang, train_dataloader = get_dataloader(batch_size)\n", "\n", "encoder = EncoderRNN(input_lang.n_words, hidden_size).to(device)\n", "decoder = AttnDecoderRNN(hidden_size, output_lang.n_words).to(device)\n", "\n", "train(train_dataloader, encoder, decoder, 80, print_every=5, plot_every=5)" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:22:39.754370Z", "iopub.execute_input": "2024-05-25T14:22:39.754740Z", "iopub.status.idle": "2024-05-25T14:26:53.707012Z", "shell.execute_reply.started": "2024-05-25T14:22:39.754714Z", "shell.execute_reply": "2024-05-25T14:26:53.705969Z" }, "trusted": true }, "execution_count": 32, "outputs": [ { "name": "stdout", "text": "Reading lines...\nRead 72258 sentence pairs\nTrimmed to 5005 sentence pairs\nCounting words...\nCounted words:\nfin 3686\nen 1971\n0m 21s (- 5m 21s) (5 6%) 1.9364\n0m 36s (- 4m 17s) (10 12%) 1.0355\n0m 51s (- 3m 45s) (15 18%) 0.6313\n1m 7s (- 3m 21s) (20 25%) 0.3787\n1m 22s (- 3m 1s) (25 31%) 0.2243\n1m 37s (- 2m 42s) (30 37%) 0.1371\n1m 52s (- 2m 25s) (35 43%) 0.0903\n2m 7s (- 2m 7s) (40 50%) 0.0668\n2m 23s (- 1m 51s) (45 56%) 0.0538\n2m 38s (- 1m 34s) (50 62%) 0.0471\n2m 53s (- 1m 18s) (55 68%) 0.0410\n3m 8s (- 1m 2s) (60 75%) 0.0381\n3m 23s (- 0m 47s) (65 81%) 0.0343\n3m 38s (- 0m 31s) (70 87%) 0.0342\n3m 54s (- 0m 15s) (75 93%) 0.0322\n4m 9s (- 0m 0s) (80 100%) 0.0307\n", "output_type": "stream" } ] }, { "cell_type": "code", "source": [ "evaluateRandomly(encoder, decoder)" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T15:48:23.131435Z", "iopub.execute_input": "2024-05-25T15:48:23.131948Z", "iopub.status.idle": "2024-05-25T15:48:23.213007Z", "shell.execute_reply.started": "2024-05-25T15:48:23.131911Z", "shell.execute_reply": "2024-05-25T15:48:23.211856Z" }, "trusted": true }, "execution_count": 121, "outputs": [ { "name": "stdout", "text": "Input sentence: olen hyvin hyvin vihainen\nTarget (true) translation: i m very very angry\nOutput sentence: i am very angry today \n\nInput sentence: han valehtelee\nTarget (true) translation: he s lying\nOutput sentence: he is telling a lie \n\nInput sentence: olen myohassa\nTarget (true) translation: i m late\nOutput sentence: i m late \n\nInput sentence: han on linja autonkuljettaja\nTarget (true) translation: he is a bus driver\nOutput sentence: he is a bus driver \n\nInput sentence: mukava tavata sinut taas\nTarget (true) translation: i m glad to see you again\nOutput sentence: i m glad to see you again \n\nInput sentence: olet kuumeessa\nTarget (true) translation: you re running a fever\nOutput sentence: you re so predictable \n\nInput sentence: anteeksi mutta unohdin tehda laksyt\nTarget (true) translation: i m sorry i forgot to do my homework\nOutput sentence: i m sorry i forgot to do my homework \n\nInput sentence: mina olen tyoton\nTarget (true) translation: i m unemployed\nOutput sentence: i m unemployed \n\nInput sentence: olen taynna\nTarget (true) translation: i am full\nOutput sentence: i am full of french \n\nInput sentence: ma kuolen nalkaan !\nTarget (true) translation: i m dying of hunger\nOutput sentence: i m dying of hunger \n\n", "output_type": "stream" } ] }, { "cell_type": "code", "source": [ "def showAttention(input_sentence, output_words, attentions):\n", " fig = plt.figure()\n", " ax = fig.add_subplot(111)\n", " cax = ax.matshow(attentions.cpu().numpy(), cmap='bone')\n", " fig.colorbar(cax)\n", "\n", " # Set up axes\n", " ax.set_xticklabels([''] + input_sentence.split(' ') +\n", " [''], rotation=90)\n", " ax.set_yticklabels([''] + output_words)\n", "\n", " # Show label at every tick\n", " ax.xaxis.set_major_locator(ticker.MultipleLocator(1))\n", " ax.yaxis.set_major_locator(ticker.MultipleLocator(1))\n", "\n", " plt.show()\n", "\n", "\n", "def evaluateAndShowAttention(input_sentence):\n", " input_sentence = normalizeString(input_sentence)\n", " output_words, attentions = evaluate(encoder, decoder, input_sentence, input_lang, output_lang)\n", " print('input =', input_sentence)\n", " print('output =', ' '.join(output_words))\n", " showAttention(input_sentence, output_words, attentions[0, :len(output_words), :])\n", "\n", "def translate(input_sentence, tokenized=False):\n", " input_sentence = normalizeString(input_sentence)\n", " output_words, attentions = evaluate(encoder, decoder, input_sentence, input_lang, output_lang)\n", " if tokenized:\n", " if \"\" in output_words:\n", " output_words.remove(\"\")\n", " return output_words\n", " return ' '.join(output_words)" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T15:30:41.253325Z", "iopub.execute_input": "2024-05-25T15:30:41.253734Z", "iopub.status.idle": "2024-05-25T15:30:41.264515Z", "shell.execute_reply.started": "2024-05-25T15:30:41.253703Z", "shell.execute_reply": "2024-05-25T15:30:41.263376Z" }, "trusted": true }, "execution_count": 99, "outputs": [] }, { "cell_type": "code", "source": [ "translate(\"Meillä on nälkä\", tokenized=True)" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:58:53.639252Z", "iopub.execute_input": "2024-05-25T14:58:53.639963Z", "iopub.status.idle": "2024-05-25T14:58:53.654186Z", "shell.execute_reply.started": "2024-05-25T14:58:53.639932Z", "shell.execute_reply": "2024-05-25T14:58:53.653028Z" }, "trusted": true }, "execution_count": 76, "outputs": [ { "execution_count": 76, "output_type": "execute_result", "data": { "text/plain": "['we', 'are', 'hungry']" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "evaluateAndShowAttention('Olet liian naivi')" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:46:05.023218Z", "iopub.execute_input": "2024-05-25T14:46:05.024277Z", "iopub.status.idle": "2024-05-25T14:46:05.426793Z", "shell.execute_reply.started": "2024-05-25T14:46:05.024227Z", "shell.execute_reply": "2024-05-25T14:46:05.424993Z" }, "trusted": true }, "execution_count": 44, "outputs": [ { "name": "stdout", "text": "input = olet liian naivi\noutput = you re too naive \n", "output_type": "stream" }, { "name": "stderr", "text": "/tmp/ipykernel_34/2052950992.py:8: UserWarning: FixedFormatter should only be used together with FixedLocator\n ax.set_xticklabels([''] + input_sentence.split(' ') +\n/tmp/ipykernel_34/2052950992.py:10: UserWarning: FixedFormatter should only be used together with FixedLocator\n ax.set_yticklabels([''] + output_words)\n", "output_type": "stream" }, { "output_type": "display_data", "data": { "text/plain": "
", "image/png": "" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "evaluateAndShowAttention('Olen todella pahoillani')" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:46:10.394969Z", "iopub.execute_input": "2024-05-25T14:46:10.395671Z", "iopub.status.idle": "2024-05-25T14:46:10.793392Z", "shell.execute_reply.started": "2024-05-25T14:46:10.395630Z", "shell.execute_reply": "2024-05-25T14:46:10.791940Z" }, "trusted": true }, "execution_count": 45, "outputs": [ { "name": "stdout", "text": "input = olen todella pahoillani\noutput = i am truly sorry \n", "output_type": "stream" }, { "name": "stderr", "text": "/tmp/ipykernel_34/2052950992.py:8: UserWarning: FixedFormatter should only be used together with FixedLocator\n ax.set_xticklabels([''] + input_sentence.split(' ') +\n/tmp/ipykernel_34/2052950992.py:10: UserWarning: FixedFormatter should only be used together with FixedLocator\n ax.set_yticklabels([''] + output_words)\n", "output_type": "stream" }, { "output_type": "display_data", "data": { "text/plain": "
", "image/png": "" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "evaluateAndShowAttention('Olet minun isäni')" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:46:13.190403Z", "iopub.execute_input": "2024-05-25T14:46:13.191025Z", "iopub.status.idle": "2024-05-25T14:46:13.613486Z", "shell.execute_reply.started": "2024-05-25T14:46:13.190997Z", "shell.execute_reply": "2024-05-25T14:46:13.612143Z" }, "trusted": true }, "execution_count": 46, "outputs": [ { "name": "stdout", "text": "input = olet minun isani\noutput = you re my father \n", "output_type": "stream" }, { "name": "stderr", "text": "/tmp/ipykernel_34/2052950992.py:8: UserWarning: FixedFormatter should only be used together with FixedLocator\n ax.set_xticklabels([''] + input_sentence.split(' ') +\n/tmp/ipykernel_34/2052950992.py:10: UserWarning: FixedFormatter should only be used together with FixedLocator\n ax.set_yticklabels([''] + output_words)\n", "output_type": "stream" }, { "output_type": "display_data", "data": { "text/plain": "
", "image/png": "" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "evaluateAndShowAttention('Hän on opettaja')" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:46:15.840433Z", "iopub.execute_input": "2024-05-25T14:46:15.841005Z", "iopub.status.idle": "2024-05-25T14:46:16.232003Z", "shell.execute_reply.started": "2024-05-25T14:46:15.840973Z", "shell.execute_reply": "2024-05-25T14:46:16.230365Z" }, "trusted": true }, "execution_count": 47, "outputs": [ { "name": "stdout", "text": "input = han on opettaja\noutput = he is a teacher \n", "output_type": "stream" }, { "name": "stderr", "text": "/tmp/ipykernel_34/2052950992.py:8: UserWarning: FixedFormatter should only be used together with FixedLocator\n ax.set_xticklabels([''] + input_sentence.split(' ') +\n/tmp/ipykernel_34/2052950992.py:10: UserWarning: FixedFormatter should only be used together with FixedLocator\n ax.set_yticklabels([''] + output_words)\n", "output_type": "stream" }, { "output_type": "display_data", "data": { "text/plain": "
", "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcUAAAHOCAYAAADpBhJHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAx5klEQVR4nO3deVxU9f7H8feAAioMriwqgpZL7lsalmmFWT2ybDUqF8qysl8p9TC95VqJWZlZloqa2i21bLmWSwuKXdMs8Wq5pOYGWihKikJCMvP7g2Fuc8UjBnhmOK8nj/O4zpmzfObE5TOf7/l+v8fmdDqdAgAA8jM7AAAAvAVJEQAAF5IiAAAuJEUAAFxIigAAuJAUAQBwISkCAOBCUgQAwIWkCACAC0kRAAAXkiIAAC4kRQAAXKqYHQAAcyxZskQffPCB0tPTVVBQ4PHepk2bTIoKMBeVImBB06ZNU0JCgsLDw/Wf//xHXbp0UZ06dbR3717deOONZocHmMbGo6MA62nRooXGjh2r+Ph4hYSEaMuWLWrSpInGjBmj7Oxsvfnmm2aHCJiCShGwoPT0dHXr1k2SVK1aNZ08eVKS1L9/fy1cuNDM0ABTkRQBC4qIiFB2drYkqVGjRvruu+8kSfv27RONR7AykiJgQddee62WLl0qSUpISNDw4cPVq1cv9evXT7fddpvJ0QHm4Z4iYEEOh0MOh0NVqhR1QF+0aJHWrVunpk2basiQIQoICDA5QsAcJEUAOI/CwkJt27ZNLVu2dH+RQOXEf13AIn788Ue1bt1afn5++vHHHw23DQ4OVlRUlKpWrXqRovNun332me644w4tWLBA9913n9nhoAJRKQIW4efnp8zMTIWFhcnPz082m82wU01oaKhmzJihfv36XcQovdNtt92m9evXq02bNvrqq6/MDgcViKQIWMSBAwfUqFEj2Ww2HThwwHDb/Px8ffjhh0pOTtb+/fsvToBe6ujRo2rYsKE+/fRT3XLLLdq7d68aNmxodlioIDSfAhYRHR1d4r/P5bHHHlNaWlpFhuQTFi5cqNatW+uGG25Q9+7d9e6772rUqFFmh4UKQqUIWFheXl6Jc5+2bdvWpIi8T6dOnTRw4EA98cQTeueddzR58mTt2LHD7LBQQUiKgAVlZWUpISFBK1asKPH9wsLCixyRd9q6das6deqkQ4cOqW7dujp16pTCw8O1atUqde3a1ezwUAEYvA9Y0LBhw3T8+HFt2LBB1apV08qVKzV//nw1bdrUPagf0vz583X99derbt26kop65fbt21fz5s0zNzBUGCpFwIIiIyP1r3/9S126dJHdbtfGjRvVrFkzLV26VJMnT9batWvNDtF0hYWFatiwoaZNm6a77rrLvX7FihW67777lJmZySQHlRCVImBBubm5CgsLkyTVqlVLWVlZkqQ2bdrwLEWXI0eO6NFHH9Wtt97qsb53795KTExUZmamSZGhIlEpAhZ0+eWX64UXXlDv3r11yy23qGbNmkpKStK0adO0ZMkS7dmzx+wQAVOQFAEL+uc//6kzZ85o0KBBSktL0w033KDs7GwFBARo3rx5DNg/hwMHDig3N1ctWrSQnx8NbZURSRGA8vLy9PPPP6tRo0buTiVWNnfuXB0/flyJiYnudQ8//LDmzJkjSWrevLm++OILRUVFmRUiKghfdQALmjBhgvLy8tyvq1evro4dO6pGjRqaMGGCiZF5h1mzZqlWrVru1ytXrtQ777yjBQsW6IcfflDNmjU1fvx4EyNERaFSBCzI399fv/32m7uzTbFjx44pLCzM8uMU69Spo9TUVLVp00aS9OijjyorK0tLliyRJKWmpiohIUH79u0zM0xUACpFwIKcTqdsNttZ67ds2aLatWubEJF3+eOPP2S3292v161bp6uvvtr9ukmTJvQ+raSY+xSwkFq1aslms8lms6lZs2YeibGwsFCnTp3SI488YmKE3iE6OlppaWmKjo7W0aNHtW3bNl155ZXu9zMzMxUaGmpihKgoJEXAQqZOnSqn06kHHnhA48eP9/jDHhAQoJiYGMXGxpoYoXcYOHCghg4dqm3btmnVqlVq0aKFOnXq5H5/3bp1at26tYkRoqKQFAELGThwoCSpcePGuvLKK3mK/DmMGDFCeXl5+vjjjxUREaEPP/zQ4/1vv/1W8fHxJkWHikRHG8CC6GgDlIyviYAFneu7cH5+PvN5/sUff/yhr776Srt27ZIkNWvWTL169VK1atVMjgwVhaQIWMi0adMkSTabTbNnz1ZwcLD7vcLCQn3zzTdq0aKFWeF5laVLl2rw4ME6evSox/q6detqzpw56tOnj0mRoSLRfApYSOPGjSUVTVfWsGFD+fv7u98r7mgzYcIEyz8rcN26derZs6duueUWPfXUU7rsssskSdu3b9err76qzz//XGvWrNEVV1xhcqQobyRFwIKuueYaffzxxx6ztuC/brrpJkVFRWnmzJklvj9kyBBlZGRo+fLlFzkyVDSSImBhBQUF2rdvny655BJ6ov5F7dq1tWbNGveMNv/rxx9/VI8ePfT7779f5MhQ0ZjRBrCgP/74Qw8++KCqV6+uVq1aKT09XZL0f//3f5o0aZLJ0Znvf2e0+V+hoaE6ffr0RYwIFwtJ0UccPnxY/fv3V/369VWlShX5+/t7LMCFGDlypLZs2aLU1FQFBQW518fFxWnx4sUmRuYdmjZtqlWrVp3z/ZSUFDVt2vQiRoSLhfYSHzFo0CClp6dr9OjRioyMLHHeSqC0Pv30Uy1evFhXXHGFx+9Sq1ateMCwpISEBD399NMKDw/XTTfd5PHesmXLNGLECP3jH/8wKTpUJJKij1i7dq3+/e9/q3379maHgkogKyvrrIH7kpSbm8sXLklPPvmk1q1bp5tvvlnNmzfXZZddJqfTqR07dmj37t3q27evhg0bZnaYqAA0n/qIqKiocw64Bi5U586dtWzZMvfr4kQ4e/Zs5j6V5Ofnpw8//FALFy5U8+bN9fPPP2vnzp1q0aKF3nvvPX300Ufy8+PPZ2VE71Mf8eWXX+rVV1/VzJkzFRMTY3Y48HFr167VjTfeqPvvv1/z5s3TkCFDtH37dq1bt05r1qzxmPwasBK+6viIfv36KTU1VZdccolCQkJUu3ZtjwW4EFdddZU2b96sM2fOqE2bNvryyy8VFham9evXkxAlffDBByooKHC/PnjwoBwOh/t1Xl6eJk+ebEZoqGBUij5i/vz5hu8XP/0AQNn974TpdrtdmzdvVpMmTSQV9QavX78+E6dXQnS08REkvQtTUFCgI0eOeHy7l6RGjRqZFJH3KSws1CeffKIdO3ZIklq2bKlbb72VQfw6e8J0agfr4LffB50+fdqjaUeS4UBjK9m9e7ceeOABrVu3zmO90+mUzWbjm73Ltm3bdMsttygzM1PNmzeXJL300kuqV6+ePvvsMx6gC8siKfqI3NxcPfPMM/rggw907Nixs97nj32RQYMGqUqVKvr8888Zz2lg8ODBatWqlTZu3Oie//T333/XoEGD9PDDD5/1pQKwCpKijxgxYoRWr16tt99+W/3799f06dN16NAhzZw5k2m5/mLz5s1KS0vj8UfnsXnzZo+EKEm1atXSiy++qMsvv9zEyLzHF198odDQUEmSw+FQSkqKtm7dKkk6fvy4iZGhIpEUfcRnn32mBQsWqGfPnkpISFD37t116aWXKjo6Wu+9957uu+8+s0P0Ci1btjzr+Xc4W7NmzXT48GG1atXKY/2RI0d06aWXmhSVd/nf+/hDhgzxeE0rROXEkAwfkZ2d7e75ZrfblZ2dLamoa/0333xjZmhe5aWXXtKIESOUmpqqY8eOKScnx2NBkaSkJD3xxBNasmSJDh48qIMHD2rJkiUaNmyYXnrpJctfM4fDcd6FWxaVE0MyfETbtm31xhtvqEePHoqLi1P79u31yiuvaNq0aZo8ebIOHjxodohe4a+zjPz1mzwdbTyVdJ2K/xT89bWVr1leXp727NlT4uOjtm3bpujoaAUHB5sQGSoSzac+IiEhQVu2bFGPHj00cuRI9enTR2+++ab+/PNPTZkyxezwvMbq1avNDsEncJ3Or6CgQF27dlVqaqq6dOniXr99+3Z16NBB6enpJMVKiErRRx04cEBpaWm69NJL1bZtW7PD8SrHjx/XnDlzPMbfPfjgg+5OEyjCdTq/u+++W2FhYXrzzTfd60aNGqXNmzdrxYoVJkaGikJS9CEpKSlKSUkpcVD63LlzTYrKu2zcuFE33HCDgoKC3N/uf/jhB/3xxx/68ssv1bFjR5Mj9A5cp9JZtmyZBg0apN9++01VqlSR0+lUdHS0XnnlFd19991mh4cKQFL0EePHj9eECRPUuXPnEsffffLJJyZF5l2Ke+UmJye7Z2Y5c+aMBg8erL1799IpyYXrVDqFhYVq2LChZsyYoVtvvVWrV6/WHXfcoczMTAUEBJgdHioASdFHREZGavLkyerfv7/ZoXi1atWq6T//+c9Z4xS3b9+uzp07Ky8vz6TIvAvXqfSefvpp7du3Tx999JEeeOABBQYG6u233zY7LFQQhmT4iIKCAnXr1s3sMLye3W5Xenr6WeszMjIUEhJiQkTeietUegMHDtTy5ct16NAhffTRR8xDXMmRFH3E4MGD9f7775sdhtfr16+fHnzwQS1evFgZGRnKyMjQokWLNHjwYMXHx5sdntfgOpVemzZt1LJlS913332KjIzUFVdcYXZIqEAMyfBiiYmJ7n87HA7NmjVLX3/9tdq2bauqVat6bMuwjCKvvPKKbDabBgwYoDNnzkiSqlatqkcffZTp8P6C63RhBgwYoOHDh+uFF14wOxRUMO4perFrrrmmVNvZbDatWrWqgqPxLcUDryXpkksuUfXq1U2OyDtxnUonOztbb7zxhoYMGaKIiAizw0EFIikCAODCPUUAAFxIigAAuJAUAQBwISn6oPz8fI0bN075+flmh+LVuE6lw3UqHa6TNdDRxgfl5OQoNDRUJ06ckN1uNzscr8V1Kh2uU+lwnayBShEAABeSIgAALsxocx4Oh0O//vqrQkJCznoyhVlycnI8/hcl4zqVDtepdLzxOjmdTp08eVL169eXn1/F1DinT59WQUFBuRwrICBAQUFB5XKsisI9xfM4ePCgoqKizA4DAM4pIyNDDRs2LPfjnj59Wo0bN1ZmZma5HC8iIkL79u3z6sRIpXgePDEA5e2ZpOlmh+ATfs/83ewQvF5B/mnNm/FChf2dKigoUGZmptLT08vcuSgnJ0eNGjVSQUEBSdGXeUuTKSqPwKBqZofgEwIC/zA7BJ9R0X+ngkNCFFzGxOvwkUZJOtoAAOBCpQgAMOR0OlXW7ie+0n2FpAgAMOR0/ZT1GL6A5lMAAFyoFAEAhhzOoqWsx/AFJEUAgCEr3VOk+RQAABcqRQCAIYfTWeZxhr4yTpGkCAAwRPMpAAAWRKUIADBkpUqRpAgAMMQ9RQAAXKxUKXJPEQAAFypFAIAhK819SlIEABiy0jRvNJ8CAOBCpQgAMFYOHW3kIx1tSIoAAENWGpJB8ykAAC5UigAAQ1Yap0hSBAAYslJSpPkUAAAXKkUAgCErdbQhKQIADFmp+ZSkCAAwZKVp3rinCACAC5UiAMAQc5/6iJ49e2rYsGFmhwEAlZpT/72v+LcXsz9EKfl0UgQAoDzRfAoAMGSl3qc+Xyk6HA6NGDFCtWvXVkREhMaNG+d+7/jx4xo8eLDq1asnu92ua6+9Vlu2bDEvWADwQcXjFMu6+AKfT4rz589XjRo1tGHDBk2ePFkTJkzQV199JUm66667dOTIEa1YsUJpaWnq2LGjrrvuOmVnZ5/zePn5+crJyfFYAADW4PPNp23bttXYsWMlSU2bNtWbb76plJQUVatWTd9//72OHDmiwMBASdIrr7yiTz/9VEuWLNHDDz9c4vGSkpI0fvz4ixY/AHg7mk99SNu2bT1eR0ZG6siRI9qyZYtOnTqlOnXqKDg42L3s27dPe/bsOefxRo0apRMnTriXjIyMiv4IAODVrNR86vOVYtWqVT1e22w2ORwOnTp1SpGRkUpNTT1rn5o1a57zeIGBge7KEgBgLT6fFM+lY8eOyszMVJUqVRQTE2N2OADgu8qh+VQ+Uin6fPPpucTFxSk2NlZ9+/bVl19+qf3792vdunV69tlntXHjRrPDAwCf4SynH19QaStFm82m5cuX69lnn1VCQoKysrIUERGhq6++WuHh4WaHBwA+w0rTvPl0UizpfuGnn37q/ndISIimTZumadOmXbygAAA+y6eTIgCg4llpSAZJEQBgyEpJsdJ2tAEA4EJRKQIADJXH4HsG7wMAKgWaTwEAsCAqRQCAIStViiRFAIAhK91TpPkUAAAXkiIAwJCZc59Onz5dMTExCgoKUteuXfX9998bbj916lQ1b95c1apVU1RUlIYPH67Tp0+X+nwkRQCAoeK5T8u6XKjFixcrMTFRY8eO1aZNm9SuXTv17t1bR44cKXH7999/XyNHjtTYsWO1Y8cOzZkzR4sXL9Y//vGPUp+TpAgAMFTc0aasy4WaMmWKHnroISUkJKhly5aaMWOGqlevrrlz55a4/bp163TllVfq3nvvVUxMjK6//nrFx8eft7r8K5IiAOCiycnJ8Vjy8/NL3K6goEBpaWmKi4tzr/Pz81NcXJzWr19f4j7dunVTWlqaOwnu3btXy5cv10033VTq+Oh9CgAwVJ5DMqKiojzWjx07VuPGjTtr+6NHj6qwsPCsR/2Fh4fr559/LvEc9957r44ePaqrrrpKTqdTZ86c0SOPPHJBzackRQCAIWc5DMkoTooZGRmy2+3u9YGBgWU67l+lpqZq4sSJeuutt9S1a1f98ssvevLJJ/X8889r9OjRpToGSREAcNHY7XaPpHgudevWlb+/vw4fPuyx/vDhw4qIiChxn9GjR6t///4aPHiwJKlNmzbKzc3Vww8/rGeffVZ+fue/Y8g9RQCAITM62gQEBKhTp05KSUlxr3M4HEpJSVFsbGyJ++Tl5Z2V+Pz9/d2foTSoFAEAhpwq+zRtf2fvxMREDRw4UJ07d1aXLl00depU5ebmKiEhQZI0YMAANWjQQElJSZKkPn36aMqUKerQoYO7+XT06NHq06ePOzmeD0kRAOCV+vXrp6ysLI0ZM0aZmZlq3769Vq5c6e58k56e7lEZPvfcc7LZbHruued06NAh1atXT3369NGLL75Y6nPanL4yS6tJcnJyFBoaanYYqETGvFbyGCt4yv4t2+wQvF5B/mnNev05nThxolT36S5U8d+/L9LSVCM4uEzHyj11Sr07daqwWMsLlSIAwFBZpmn76zF8AR1tAABwoVIEABj6u3OX/u8xfAFJEQBgiIcMAwDgYqWkyD1FAABcqBQBAIYc5TD3aVn3v1hIigAAQzSfAgBgQVSKAABDVqoUSYrARVa3QR2zQ/AJu9N2mx2C1/uzoOSn1pc3K91TpPkUAAAXKkUAgCErzX1KUgQAGHI6i5ayHsMX0HwKAIALlSIAwJCzHDra0PsUAFApMCQDAAAXhmQAAGBBVIoAAEM0nwIA4GKlpEjzKQAALlSKAABDVupoQ1IEABiy0jRvNJ8CAOBCpQgAMGSluU9JigAAQ9xTBADAxamyD6nwjZTIPUUAANyoFAEAhmg+BQDAhRltAACwICpFAIAhK1WKJEUAgDELDVSk+RQAABcqRQCAIafDKaejjM2nZdz/YiEpAgCMlUPrqa+M3qf5FAAAFypFAIAhep8CAOBCUgQAwMVKSZF7igAAuFTapNizZ08NGzbM7DAAwOcVD8ko6+ILKm3z6ccff6yqVauaHQYA+DwrNZ9W2qRYu3Zts0MAAPgYSzSfvvXWW2ratKmCgoIUHh6uO++809zgAMCHFFeKZV18QaWtFItt3LhRTzzxhN59911169ZN2dnZ+ve//33O7fPz85Wfn+9+nZOTczHCBADvZaEJwSt9UkxPT1eNGjV08803KyQkRNHR0erQocM5t09KStL48eMvYoQAAG9RaZtPi/Xq1UvR0dFq0qSJ+vfvr/fee095eXnn3H7UqFE6ceKEe8nIyLiI0QKA9ykuFMu6+IJKnxRDQkK0adMmLVy4UJGRkRozZozatWun48ePl7h9YGCg7Ha7xwIAVuZ0lsOQDB/JipU+KUpSlSpVFBcXp8mTJ+vHH3/U/v37tWrVKrPDAgB4mUp/T/Hzzz/X3r17dfXVV6tWrVpavny5HA6HmjdvbnZoAOATGKdYidSsWVMff/yxxo0bp9OnT6tp06ZauHChWrVqZXZoAOATSIqVQGpqaon/BgBcGCslRUvcUwQAoDQqbaUIACgfVqoUSYoAAGMOSWV9yoWjXCKpcDSfAgDgQqUIADBE8ykAAC4Wmg+c5lMAAIpRKQIADNF8CgCAi5WSIs2nAAC4UCkCAAwVP/6prMfwBSRFAICxcmg+9ZXupyRFAIAh7ikCAOAFpk+frpiYGAUFBalr1676/vvvDbc/fvy4hg4dqsjISAUGBqpZs2Zavnx5qc9HpQgAMGRWpbh48WIlJiZqxowZ6tq1q6ZOnarevXtr586dCgsLO2v7goIC9erVS2FhYVqyZIkaNGigAwcOqGbNmqU+J0kRAGDMpCltpkyZooceekgJCQmSpBkzZmjZsmWaO3euRo4cedb2c+fOVXZ2ttatW6eqVatKkmJiYi7onDSfAgAumpycHI8lPz+/xO0KCgqUlpamuLg49zo/Pz/FxcVp/fr1Je6zdOlSxcbGaujQoQoPD1fr1q01ceJEFRYWljo+kiIAwJDTUT6LJEVFRSk0NNS9JCUllXjOo0ePqrCwUOHh4R7rw8PDlZmZWeI+e/fu1ZIlS1RYWKjly5dr9OjRevXVV/XCCy+U+rPSfAoAMORUOdxTVNH+GRkZstvt7vWBgYFlOu5fORwOhYWFadasWfL391enTp106NAhvfzyyxo7dmypjkFSBABcNHa73SMpnkvdunXl7++vw4cPe6w/fPiwIiIiStwnMjJSVatWlb+/v3vdZZddpszMTBUUFCggIOC856X5FABgqLj3aVmXCxEQEKBOnTopJSXFvc7hcCglJUWxsbEl7nPllVfql19+kcPhcK/btWuXIiMjS5UQJZIiAOA8zEiKkpSYmKjk5GTNnz9fO3bs0KOPPqrc3Fx3b9QBAwZo1KhR7u0fffRRZWdn68knn9SuXbu0bNkyTZw4UUOHDi31OWk+BQB4pX79+ikrK0tjxoxRZmam2rdvr5UrV7o736Snp8vP77+1XVRUlL744gsNHz5cbdu2VYMGDfTkk0/qmWeeKfU5SYoAAENmTvP2+OOP6/HHHy/xvdTU1LPWxcbG6rvvvvtb55JIigCA8+ApGQAAFDNpRhsz0NEGAAAXKkUAgCErPTqKpAgAMGSh1lOaTwEAKEalCFxkzVs0NjsEn/D+lLlmh+D1zpz586Kch+ZTAABcrDQkg+ZTAABcqBQBAIZoPgUAwKWo92lZk2I5BVPBaD4FAMCFShEAYIjmUwAAXEiKAAAUcziLlrIewwdwTxEAABcqRQCAIafKYe7Tcomk4pEUAQDGyuGeoq+MyaD5FAAAFypFAIAhep8CAODChOAAAFgQlSIAwBDNpwAAuFgpKdJ8CgCAC5UiAMBY0bOjyn4MH0BSBAAYslLzKUkRAGDI6ShaynoMX8A9RQAAXKgUAQCGaD4FAMDFSkmR5lMAAFyoFAEAhqxUKZIUAQCGrJQUaT4FAMCFShEAYMhKj44iKQIADNF8CgCABVEpAgDOoxwmBJdvVIokRQCAIQs9JIOkCAAwVpQUy3pPsZyCqWCWuKe4cuVKXXXVVapZs6bq1Kmjm2++WXv27DE7LACAl7FEUszNzVViYqI2btyolJQU+fn56bbbbpPDcfazTPLz85WTk+OxAICVFQ/JKOviCyzRfHrHHXd4vJ47d67q1aun7du3q3Xr1h7vJSUlafz48RczPADwagzJqGR2796t+Ph4NWnSRHa7XTExMZKk9PT0s7YdNWqUTpw44V4yMjIucrQAALNYolLs06ePoqOjlZycrPr168vhcKh169YqKCg4a9vAwEAFBgaaECUAeCcrVYqVPikeO3ZMO3fuVHJysrp37y5JWrt2rclRAYAPKYek6CvdTyt9UqxVq5bq1KmjWbNmKTIyUunp6Ro5cqTZYQEAvFClv6fo5+enRYsWKS0tTa1bt9bw4cP18ssvmx0WAPiO4tH7ZV18QKWvFCUpLi5O27dv91jnK+3bAGA2Kz0lo9JXigAAlJYlKkUAwN/H3KcAALgwJAMAABcrJUXuKQIA4EKlCAAwZKVKkaQIADDEkAwAACyIShEAYIjmUwAA3MpjmjbfSIo0nwIA4EKlCAAwRPMpAAAuVprmjeZTAABcqBQBAIasNE6RpAgAMMQ9RQAAXKyUFLmnCACAC0kRAGCouFIs6/J3TJ8+XTExMQoKClLXrl31/fffl2q/RYsWyWazqW/fvhd0PpIiAMBQ0ZCMsibFCz/v4sWLlZiYqLFjx2rTpk1q166devfurSNHjhjut3//fj399NPq3r37BZ+TpAgA8EpTpkzRQw89pISEBLVs2VIzZsxQ9erVNXfu3HPuU1hYqPvuu0/jx49XkyZNLvicJEUAgKHiIRllXSQpJyfHY8nPzy/xnAUFBUpLS1NcXJx7nZ+fn+Li4rR+/fpzxjphwgSFhYXpwQcf/FuflaQIADBWPKVNWRdJUVFRCg0NdS9JSUklnvLo0aMqLCxUeHi4x/rw8HBlZmaWuM/atWs1Z84cJScn/+2PypAMAMBFk5GRIbvd7n4dGBhYLsc9efKk+vfvr+TkZNWtW/dvH4ekCAAwVJ5zn9rtdo+keC5169aVv7+/Dh8+7LH+8OHDioiIOGv7PXv2aP/+/erTp497ncPhkCRVqVJFO3fu1CWXXHLe89J8CgAwZMaQjICAAHXq1EkpKSnudQ6HQykpKYqNjT1r+xYtWuinn37S5s2b3cstt9yia665Rps3b1ZUVFSpzkulCADwSomJiRo4cKA6d+6sLl26aOrUqcrNzVVCQoIkacCAAWrQoIGSkpIUFBSk1q1be+xfs2ZNSTprvRGSIgDAWDlM8/Z32l/79eunrKwsjRkzRpmZmWrfvr1Wrlzp7nyTnp4uP7/ybfAkKQIADJn5lIzHH39cjz/+eInvpaamGu47b968Cz4fSREAYMhKE4KTFC+IzewAUAmMHjzK7BB8wvOzSx6/hv/KPXVKt3dbbnYYlQpJEQBgyKlyqBRFpQgAqASs1HzKOEUAAFyoFAEAxspzShsvR1IEABhyOoqWsh7DF9B8CgCAC5UiAMCQlTrakBQBAIaslBRpPgUAwIVKEQBgyEqVIkkRAGCIpAgAgIuZT8m42LinCACAC5UiAMAYM9oAAFDE6fop6zF8Ac2nAAC4UCkCAAzR+xQAAJeipFi2Gb19JSnSfAoAgAuVIgDAEM2nAAC4WCkp0nwKAIALlSIAwJCVKkWSIgDAkNPpKIfep2Xb/2IhKQIAjFlomjfuKQIA4EKlCAAwZKW5T0mKAIDzKHtHG/lIUqT5FAAAFypFAIAhhmQAAOBipSEZXt98Om7cOLVv397sMAAAFnBBSbFnz54aNmxYBYUCAPBGxc2nZV18gSWbT51OpwoLC1WliiU/PgBcECvdUyx1pTho0CCtWbNGr7/+umw2m2w2m/bv36+tW7fqxhtvVHBwsMLDw9W/f38dPXrUvd/KlSt11VVXqWbNmqpTp45uvvlm7dmzx+PYBw8eVHx8vGrXrq0aNWqoc+fO2rBhg8c27777rmJiYhQaGqp77rlHJ0+edL/ncDiUlJSkxo0bq1q1amrXrp2WLFnifj81NVU2m00rVqxQp06dFBgYqLVr117wxQIAVG6lToqvv/66YmNj9dBDD+m3337Tb7/9ppCQEF177bXq0KGDNm7cqJUrV+rw4cO6++673fvl5uYqMTFRGzduVEpKivz8/HTbbbfJ4Si66Xrq1Cn16NFDhw4d0tKlS7VlyxaNGDHC/b4k7dmzR59++qk+//xzff7551qzZo0mTZrkfj8pKUkLFizQjBkztG3bNg0fPlz333+/1qxZ4/EZRo4cqUmTJmnHjh1q27ZtiZ8zPz9fOTk5HgsAWBnNpyUIDQ1VQECAqlevroiICEnSCy+8oA4dOmjixInu7ebOnauoqCjt2rVLzZo10x133OFxnLlz56pevXravn27Wrdurffff19ZWVn64YcfVLt2bUnSpZde6rGPw+HQvHnzFBISIknq37+/UlJS9OKLLyo/P18TJ07U119/rdjYWElSkyZNtHbtWs2cOVM9evRwH2fChAnq1auX4edMSkrS+PHjS3tZAKDyY+7T0tmyZYtWr16t4OBg99KiRQtJcjeR7t69W/Hx8WrSpInsdrtiYmIkSenp6ZKkzZs3q0OHDu6EWJKYmBh3QpSkyMhIHTlyRJL0yy+/KC8vT7169fKIY8GCBWc103bu3Pm8n2nUqFE6ceKEe8nIyCj9BQGASqhokjdHGRffSIpl6mly6tQp9enTRy+99NJZ70VGRkqS+vTpo+joaCUnJ6t+/fpyOBxq3bq1CgoKJEnVqlU773mqVq3q8dpms3k0v0rSsmXL1KBBA4/tAgMDPV7XqFHjvOcKDAw8az8AgDVcUFIMCAhQYWGh+3XHjh310UcfKSYmpsSenMeOHdPOnTuVnJys7t27S9JZHVzatm2r2bNnKzs727BaPJeWLVsqMDBQ6enpHk2lAIDyQe/Tc4iJidGGDRu0f/9+HT16VEOHDlV2drbi4+P1ww8/aM+ePfriiy+UkJCgwsJC1apVS3Xq1NGsWbP0yy+/aNWqVUpMTPQ4Znx8vCIiItS3b199++232rt3rz766COtX7++VDGFhITo6aef1vDhwzV//nzt2bNHmzZt0htvvKH58+dfyMcDAJTASh1tLigpPv300/L391fLli1Vr149FRQU6Ntvv1VhYaGuv/56tWnTRsOGDVPNmjXl5+cnPz8/LVq0SGlpaWrdurWGDx+ul19+2eOYAQEB+vLLLxUWFqabbrpJbdq00aRJk+Tv71/quJ5//nmNHj1aSUlJuuyyy3TDDTdo2bJlaty48YV8PACAxdmcvpK+TZKTk6PQ0FDXK5upsaBy6NLlJrND8AnPz04yOwSvl3vqlG7v1k0nTpyQ3W4v9+MX//278so7VKVK1fPvYODMmT/17bcfVVis5YUpXQAAhpgQHAAAC6JSBAAYslLvU5IiAMCQlZIizacAALhQKQIAjFlo7lOSIgDAkNP1U9Zj+AKSIgDAEEMyAACwICpFAIAhK/U+JSkCAAxZKSnSfAoAgAuVIgDAkJUqRZIiAOA8yt77VKL3KQAAPoVKEQBgiOZTAACKWWiaN5pPAQBwoVIEABhyquxzl/pGnUhSBACcB/cUAQBwYUJwAAAsiKQIADBU3Hxa1uXvmD59umJiYhQUFKSuXbvq+++/P+e2ycnJ6t69u2rVqqVatWopLi7OcPuSkBQBAIbMSoqLFy9WYmKixo4dq02bNqldu3bq3bu3jhw5UuL2qampio+P1+rVq7V+/XpFRUXp+uuv16FDh0p9TpIiAMArTZkyRQ899JASEhLUsmVLzZgxQ9WrV9fcuXNL3P69997TY489pvbt26tFixaaPXu2HA6HUlJSSn1OkiIAwFB5Voo5OTkeS35+fonnLCgoUFpamuLi4tzr/Pz8FBcXp/Xr15cq7ry8PP3555+qXbt2qT8rSREAYKg8k2JUVJRCQ0PdS1JSUonnPHr0qAoLCxUeHu6xPjw8XJmZmaWK+5lnnlH9+vU9Euv5MCQDAHDRZGRkyG63u18HBgZWyHkmTZqkRYsWKTU1VUFBQaXej6QIADDmdBQtZT2GJLvd7pEUz6Vu3bry9/fX4cOHPdYfPnxYERERhvu+8sormjRpkr7++mu1bdv2gsKk+RQAYMhZTj8XIiAgQJ06dfLoJFPcaSY2Nvac+02ePFnPP/+8Vq5cqc6dO1/wZ6VSLDWbbDab2UF4NV+ZscJsP/30jdkh+ITr27QxOwSvl5OTY3YIFSoxMVEDBw5U586d1aVLF02dOlW5ublKSEiQJA0YMEANGjRw35d86aWXNGbMGL3//vuKiYlx33sMDg5WcHBwqc5JUgQAGDJr7tN+/fopKytLY8aMUWZmptq3b6+VK1e6O9+kp6fLz++/DZ5vv/22CgoKdOedd3ocZ+zYsRo3blypzmlz+sosrSbJyclRaGioqBTPj0qxdKpVCzE7BJ+Ql1e5q6DyUPz36cSJE6W6T/d3j9+ixRXy9y9bDVVYeEY///xdhcVaXqgUAQCGmBAcAAALolIEABjieYoAALhYKSnSfAoAgAuVIgDAkJUqRZIiAMCYU1JZk5pv5ESaTwEAKEalCAAw5JRDTpVt8hKnfGOcIkkRAGDISvcUaT4FAMCFShEAcB5lrxR9pacNSREAYMhKzackRQCAoaIJwcvY0YYJwQEA8C1UigAAQzSfAgDgYqWkSPMpAAAuVIoAAGNOZznMfeoblSJJEQBgyOn6KesxfAHNpwAAuFApAgAMWWmcIkkRAGDISr1PSYoAAENWSorcUwQAwIVKEQBgyEqVIkkRAGDISkmR5lMAAFyoFAEAhooqxbINqfCVSpGkCAAwZqFp3iq8+dRms5W4LFq0yL1NYWGhXnvtNbVp00ZBQUGqVauWbrzxRn377bcexyosLNSkSZPUokULVatWTbVr11bXrl01e/bsiv4YAAALqJBK8ffff1fVqlUVHBwsSXrnnXd0ww03eGxTs2ZNSUUl9T333KOvv/5aL7/8sq677jrl5ORo+vTp6tmzpz788EP17dtXkjR+/HjNnDlTb775pjp37qycnBxt3LhRv//+u/u4v/76q8LCwlSlCkUwAJQHK819Wm6Z48yZM/riiy80b948ffbZZ9qwYYPatWsnqSgBRkRElLjfBx98oCVLlmjp0qXq06ePe/2sWbN07NgxDR48WL169VKNGjW0dOlSPfbYY7rrrrvc2xWfo1hycrLefvtt3X///Ro4cKDatGlTXh8RACyJ3qcX4KefftJTTz2lhg0basCAAapXr55Wr159VrI6l/fff1/NmjXzSIjFnnrqKR07dkxfffWVJCkiIkKrVq1SVlbWOY/3zDPP6PXXX9eOHTvUsWNHdezYUdOmTTPc56/y8/OVk5PjsQAArOFvJcVjx47p9ddfV8eOHdW5c2ft3btXb731ln777Te99dZbio2N9dg+Pj5ewcHBHkt6erokadeuXbrssstKPE/x+l27dkmSpkyZoqysLEVERKht27Z65JFHtGLFCo99goKC1K9fPy1btkyHDh3SgAEDNG/ePDVo0EB9+/bVJ598ojNnzpzzsyUlJSk0NNS9REVF/Z1LBACVRtGE4GVffMHfSopvvPGGhg0bpuDgYP3yyy/65JNPdPvttysgIKDE7V977TVt3rzZY6lfv777/dKW1S1bttTWrVv13Xff6YEHHtCRI0fUp08fDR48uMTtw8LCNGzYMG3atEn/+te/tH79et1+++3aunXrOc8xatQonThxwr1kZGSUKjYAqKyKm0/LuviCv3VP8eGHH1aVKlW0YMECtWrVSnfccYf69++vnj17ys/v7DwbERGhSy+9tMRjNWvWTDt27CjxveL1zZo1c6/z8/PT5Zdfrssvv1zDhg3TP//5T/Xv31/PPvusGjdu7LH/yZMntWTJEr377rv65ptv1KNHDw0cOFAtW7Y852cLDAxUYGDgea8BAFgF9xTPo379+nruuee0a9curVy5UgEBAbr99tsVHR2tkSNHatu2baU+1j333KPdu3frs88+O+u9V199VXXq1FGvXr3OuX9xgsvNzZVUNGxjxYoVuvfeexUeHq5Jkybpuuuu0969e5WSkqIBAwacs6IFAFhbmTvadOvWTTNnzlRmZqZefvllbd68We3atdNPP/3k3ub48ePKzMz0WIqT2D333KPbbrtNAwcO1Jw5c7R//379+OOPGjJkiJYuXarZs2erRo0akqQ777xTr732mjZs2KADBw4oNTVVQ4cOVbNmzdSiRQtJ0sSJExUfH6+QkBB9/fXX2rlzp5599lk1atSorB8VACzJSs2nNmcFRPrrr78qODhYdrtdNlvJT2tOSkrSyJEjJRUN55g6darmzZun3bt3KygoSLGxsRo9erSuvPJK9z7JyclauHChtm7dqhMnTigiIkLXXnutxo0bp+joaEnS/v37FRERoaCgoHL5LDk5OQoNDZVkO+dnQRFfuZFutmrVQswOwSfk5dHz+3yK/z6dOHFCdru9wo5fq1aEbLay1VBOp0O//55ZYbGWlwpJipUJSbH0SIqlQ1IsHZLi+ZEUyx/TvgAAjJXHF14f+dJMUgQAGCqaos0a07zxPEUAAFyoFAEAhoq6nlhjnCJJEQBgyEpJkeZTAABcqBQBAIbKY7iVrwzZIikCAAwVtXyWtfm0XEKpcCRFAICh8rgfyD1FAAB8DJUiAMCQlSpFkiIAwFh5JDQfSYo0nwIA4EKlCAAw5JRDUtmeEuQrc5+SFAEAhqx0T5HmUwAAXKgUAQCGrFQpkhQBAIaslBRpPgUAwIVKEQBgyEqVIkkRAGCo6AkXZRySQVIEAFQGVqoUuacIAIALlSIAwJiF5j4lKQIADJXHFG2+Ms0bzacAALhQKQIADNH7FAAAF3qfAgBgQVSK5/HfbzdOX+k8BS/nK9+YzZaTk2N2CF6v+BpdjN8pq/zekhTP4+TJk395ZY1fClSs06dPmR2CTwgNDTU7BJ9x8uTJCrleAQEBioiIUGZmZrkcLyIiQgEBAeVyrIpic1ol/f9NDodDv/76q0JCQmSzle1Gc3nJyclRVFSUMjIyZLfbzQ7Ha3GdSofrVDreeJ2cTqdOnjyp+vXry8+vYu6GnT59WgUFBeVyrICAAAUFBZXLsSoKleJ5+Pn5qWHDhmaHUSK73e41/+f0Zlyn0uE6lY63XaeKrqiDgoK8PpGVJzraAADgQlIEAMCFpOiDAgMDNXbsWAUGBpodilfjOpUO16l0uE7WQEcbAABcqBQBAHAhKQIA4EJSBADAhaQIAIALSREAABeSIgAALiRFAABcSIoAALj8P/bLSA8+8gedAAAAAElFTkSuQmCC" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "torch.save(encoder.state_dict(), \"encoder.pt\")\n", "torch.save(decoder.state_dict(), \"decoder.pt\")" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T14:50:24.773506Z", "iopub.execute_input": "2024-05-25T14:50:24.774464Z", "iopub.status.idle": "2024-05-25T14:50:24.795895Z", "shell.execute_reply.started": "2024-05-25T14:50:24.774430Z", "shell.execute_reply": "2024-05-25T14:50:24.794979Z" }, "trusted": true }, "execution_count": 48, "outputs": [] }, { "cell_type": "markdown", "source": [ "# BLEU score" ], "metadata": {} }, { "cell_type": "markdown", "source": [ "Jako że korzystaliśmy z okrojonej wersji zbioru danych, słownik nie zawiera wszystkich słów pojawiających się w przykładach więc do ewaluacji wykorzystujemy część przykładów z treningu" ], "metadata": {} }, { "cell_type": "code", "source": [ "import pandas as pd\n", "\n", "\n", "def filter_rows(row):\n", " return len(row[\"English\"].split(' '))\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
EnglishFinnishattribution
38027i m very serious about thisolen hyvin tosissani tastaCC-BY 2.0 (France) Attribution: tatoeba.org #2...
3803i m not tireden ole vasynytCC-BY 2.0 (France) Attribution: tatoeba.org #1...
26924i m not married eitherminakaan en ole naimisissaCC-BY 2.0 (France) Attribution: tatoeba.org #6...
32009he s sleeping like a babyhan nukkuu kuin pikkuvauvaCC-BY 2.0 (France) Attribution: tatoeba.org #2...
21339i m joking of coursese oli vitsi tietenkinCC-BY 2.0 (France) Attribution: tatoeba.org #2...
\n" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "test_section[\"English_tokenized\"] = test_section[\"English\"].apply(lambda x: x.split())" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T15:29:07.110416Z", "iopub.execute_input": "2024-05-25T15:29:07.110816Z", "iopub.status.idle": "2024-05-25T15:29:07.117378Z", "shell.execute_reply.started": "2024-05-25T15:29:07.110786Z", "shell.execute_reply": "2024-05-25T15:29:07.116136Z" }, "trusted": true }, "execution_count": 96, "outputs": [] }, { "cell_type": "code", "source": [ "test_section.head()[\"English_tokenized\"]" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T15:29:10.203170Z", "iopub.execute_input": "2024-05-25T15:29:10.203540Z", "iopub.status.idle": "2024-05-25T15:29:10.212993Z", "shell.execute_reply.started": "2024-05-25T15:29:10.203511Z", "shell.execute_reply": "2024-05-25T15:29:10.211937Z" }, "trusted": true }, "execution_count": 97, "outputs": [ { "execution_count": 97, "output_type": "execute_result", "data": { "text/plain": "38027 [i, m, very, serious, about, this]\n3803 [i, m, not, tired]\n26924 [i, m, not, married, either]\n32009 [he, s, sleeping, like, a, baby]\n21339 [i, m, joking, of, course]\nName: English_tokenized, dtype: object" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "test_section[\"English_translated\"] = test_section[\"Finnish\"].apply(lambda x: translate(x, tokenized=True))" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T15:30:53.183117Z", "iopub.execute_input": "2024-05-25T15:30:53.183937Z", "iopub.status.idle": "2024-05-25T15:30:56.313012Z", "shell.execute_reply.started": "2024-05-25T15:30:53.183902Z", "shell.execute_reply": "2024-05-25T15:30:56.312202Z" }, "trusted": true }, "execution_count": 100, "outputs": [] }, { "cell_type": "code", "source": [ "test_section.head()" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T15:31:06.745381Z", "iopub.execute_input": "2024-05-25T15:31:06.746471Z", "iopub.status.idle": "2024-05-25T15:31:06.771839Z", "shell.execute_reply.started": "2024-05-25T15:31:06.746417Z", "shell.execute_reply": "2024-05-25T15:31:06.770679Z" }, "trusted": true }, "execution_count": 101, "outputs": [ { "execution_count": 101, "output_type": "execute_result", "data": { "text/plain": " English ... English_translated\n38027 i m very serious about this ... [i, m, in, french]\n3803 i m not tired ... [i, not, tired]\n26924 i m not married either ... [i, m, not, married, either]\n32009 he s sleeping like a baby ... [he, is, as, a, pianist]\n21339 i m joking of course ... [i, m, joking, of, course]\n\n[5 rows x 5 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
EnglishFinnishattributionEnglish_tokenizedEnglish_translated
38027i m very serious about thisolen hyvin tosissani tastaCC-BY 2.0 (France) Attribution: tatoeba.org #2...[i, m, very, serious, about, this][i, m, in, french]
3803i m not tireden ole vasynytCC-BY 2.0 (France) Attribution: tatoeba.org #1...[i, m, not, tired][i, not, tired]
26924i m not married eitherminakaan en ole naimisissaCC-BY 2.0 (France) Attribution: tatoeba.org #6...[i, m, not, married, either][i, m, not, married, either]
32009he s sleeping like a babyhan nukkuu kuin pikkuvauvaCC-BY 2.0 (France) Attribution: tatoeba.org #2...[he, s, sleeping, like, a, baby][he, is, as, a, pianist]
21339i m joking of coursese oli vitsi tietenkinCC-BY 2.0 (France) Attribution: tatoeba.org #2...[i, m, joking, of, course][i, m, joking, of, course]
\n
" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "candidate_corpus = test_section[\"English_translated\"].values\n", "references_corpus = test_section[\"English_tokenized\"].values.tolist()\n", "x = candidate_corpus.tolist()\n", "y = [[el] for el in references_corpus]\n", "#print(references_corpus[:5])\n", "#print(candidate_corpus[:5])" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T15:43:29.441911Z", "iopub.execute_input": "2024-05-25T15:43:29.442752Z", "iopub.status.idle": "2024-05-25T15:43:29.447799Z", "shell.execute_reply.started": "2024-05-25T15:43:29.442721Z", "shell.execute_reply": "2024-05-25T15:43:29.446877Z" }, "trusted": true }, "execution_count": 118, "outputs": [] }, { "cell_type": "code", "source": [ "y[:5]" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T15:43:30.474463Z", "iopub.execute_input": "2024-05-25T15:43:30.475080Z", "iopub.status.idle": "2024-05-25T15:43:30.482690Z", "shell.execute_reply.started": "2024-05-25T15:43:30.475039Z", "shell.execute_reply": "2024-05-25T15:43:30.481686Z" }, "trusted": true }, "execution_count": 119, "outputs": [ { "execution_count": 119, "output_type": "execute_result", "data": { "text/plain": "[[['i', 'm', 'very', 'serious', 'about', 'this']],\n [['i', 'm', 'not', 'tired']],\n [['i', 'm', 'not', 'married', 'either']],\n [['he', 's', 'sleeping', 'like', 'a', 'baby']],\n [['i', 'm', 'joking', 'of', 'course']]]" }, "metadata": {} } ] }, { "cell_type": "code", "source": [ "from torchtext.data.metrics import bleu_score\n", "\n", "bleu_score(x, y)" ], "metadata": { "execution": { "iopub.status.busy": "2024-05-25T15:43:36.654035Z", "iopub.execute_input": "2024-05-25T15:43:36.654953Z", "iopub.status.idle": "2024-05-25T15:43:36.916617Z", "shell.execute_reply.started": "2024-05-25T15:43:36.654906Z", "shell.execute_reply": "2024-05-25T15:43:36.915429Z" }, "trusted": true }, "execution_count": 120, "outputs": [ { "execution_count": 120, "output_type": "execute_result", "data": { "text/plain": "0.5885258316993713" }, "metadata": {} } ] } ] }