diff --git a/rnn_fras.ipynb b/rnn_fras.ipynb
new file mode 100644
index 0000000..320b3f0
--- /dev/null
+++ b/rnn_fras.ipynb
@@ -0,0 +1,1403 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Zadanie domowe\n",
+    "\n",
+    "\n",
+    "- sklonować repozytorium https://git.wmi.amu.edu.pl/kubapok/en-ner-conll-2003\n",
+    "- stworzyć model seq labelling bazujący na sieci neuronowej opisanej w punkcie niżej (można bazować na tym jupyterze lub nie).\n",
+    "- model sieci to GRU (o dowolnych parametrach) + CRF w pytorchu korzystając z modułu CRF z poprzednich zajęć- - stworzyć predykcje w plikach dev-0/out.tsv oraz test-A/out.tsv\n",
+    "- wynik fscore sprawdzony za pomocą narzędzia geval (patrz poprzednie zadanie) powinien wynosić conajmniej 0.65\n",
+    "- proszę umieścić predykcję oraz skrypty generujące (w postaci tekstowej a nie jupyter) w repo, a w MS TEAMS umieścić link do swojego repo\n",
+    "termin 22.06, 60 punktów, za najlepszy wynik- 100 punktów\n",
+    " "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import torch\n",
+    "from torchtext.vocab import Vocab\n",
+    "from collections import Counter\n",
+    "from tqdm.notebook import tqdm\n",
+    "import lzma\n",
+    "import itertools\n",
+    "from torchcrf import CRF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_data(filename):\n",
+    "    all_data = lzma.open(filename).read().decode('UTF-8').split('\\n')\n",
+    "    return [line.split('\\t') for line in all_data][:-1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def data_process(dt):\n",
+    "    return [torch.tensor([vocab['<bos>']] + [vocab[token] for token in document] + [vocab['<eos>']], dtype = torch.long) for document in dt]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def labels_process(dt):\n",
+    "    return [ torch.tensor([0] + document + [0], dtype = torch.long) for document in dt]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def build_vocab(dataset):\n",
+    "    counter = Counter()\n",
+    "    for document in dataset:\n",
+    "        counter.update(document)\n",
+    "    return Vocab(counter, specials=['<unk>', '<pad>', '<bos>', '<eos>'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_data = read_data('train/train.tsv.xz')\n",
+    "\n",
+    "tokens, ner_tags = [], []\n",
+    "for i in train_data:\n",
+    "    ner_tags.append(i[0].split())\n",
+    "    tokens.append(i[1].split())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vocab = build_vocab(tokens)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_tokens_ids = data_process(tokens)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['B-LOC', 'B-MISC', 'B-ORG', 'B-PER', 'I-LOC', 'I-MISC', 'I-ORG', 'I-PER', 'O']\n"
+     ]
+    }
+   ],
+   "source": [
+    "ner_tags_set = list(set(itertools.chain(*ner_tags)))\n",
+    "ner_tags_set.sort()\n",
+    "print(ner_tags_set)\n",
+    "train_labels = labels_process([[ner_tags_set.index(token) for token in doc] for doc in ner_tags])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "num_tags = max([max(x) for x in train_labels]) + 1 "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class GRU(torch.nn.Module):\n",
+    "\n",
+    "    def __init__(self):\n",
+    "        super(GRU, self).__init__()\n",
+    "        self.emb = torch.nn.Embedding(len(vocab.itos),100)\n",
+    "        self.dropout = torch.nn.Dropout(0.2)\n",
+    "        self.rec = torch.nn.GRU(100, 256, 2, batch_first = True, bidirectional = True)\n",
+    "        self.fc1 = torch.nn.Linear(2* 256 , 9)\n",
+    "        \n",
+    "    def forward(self, x):\n",
+    "        emb = torch.relu(self.emb(x))\n",
+    "        emb = self.dropout(emb)\n",
+    "        gru_output, h_n = self.rec(emb)\n",
+    "        out_weights = self.fc1(gru_output)\n",
+    "        return out_weights"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_scores(y_true, y_pred):\n",
+    "    acc_score = 0\n",
+    "    tp = 0\n",
+    "    fp = 0\n",
+    "    selected_items = 0\n",
+    "    relevant_items = 0 \n",
+    "\n",
+    "    for p,t in zip(y_pred, y_true):\n",
+    "        if p == t:\n",
+    "            acc_score +=1\n",
+    "\n",
+    "        if p > 0 and p == t:\n",
+    "            tp +=1\n",
+    "\n",
+    "        if p > 0:\n",
+    "            selected_items += 1\n",
+    "\n",
+    "        if t > 0 :\n",
+    "            relevant_items +=1\n",
+    "            \n",
+    "    if selected_items == 0:\n",
+    "        precision = 1.0\n",
+    "    else:\n",
+    "        precision = tp / selected_items\n",
+    "            \n",
+    "    if relevant_items == 0:\n",
+    "        recall = 1.0\n",
+    "    else:\n",
+    "        recall = tp / relevant_items\n",
+    "    \n",
+    "    if precision + recall == 0.0 :\n",
+    "        f1 = 0.0\n",
+    "    else:\n",
+    "        f1 = 2* precision * recall  / (precision + recall)\n",
+    "\n",
+    "    return precision, recall, f1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def eval_model(dataset_tokens, dataset_labels, model):\n",
+    "    Y_true = []\n",
+    "    Y_pred = []\n",
+    "    for i in tqdm(range(len(dataset_labels))):\n",
+    "        batch_tokens = dataset_tokens[i].unsqueeze(1)\n",
+    "        tags = list(dataset_labels[i].numpy())\n",
+    "        emissions = gru(batch_tokens).squeeze(0)\n",
+    "        Y_pred += crf.decode(emissions)[0]\n",
+    "        Y_true += tags\n",
+    "    return get_scores(Y_true, Y_pred)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gru = GRU()\n",
+    "crf = CRF(num_tags)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "params = list(gru.parameters()) + list(crf.parameters())\n",
+    "optimizer = torch.optim.Adam(params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "NUM_EPOCHS = 20"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "criterion = torch.nn.CrossEntropyLoss()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c5da680182d74dbe8a6e6e515f39c304",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/zosia/.local/lib/python3.8/site-packages/torchcrf/__init__.py:249: UserWarning: where received a uint8 condition tensor. This behavior is deprecated and will be removed in a future version of PyTorch. Use a boolean condition instead. (Triggered internally at  /pytorch/aten/src/ATen/native/TensorCompare.cpp:255.)\n",
+      "  score = torch.where(mask[i].unsqueeze(1), next_score, score)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3ca55e4b508d4fc9b2d0720e1def2a58",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.8601941656899232, 0.8751514345303986, 0.8676083403589915)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "31afa4456a9240789283af09788a3ed9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a6b3ba1f3b474cf092a826c87a0345be",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.8815602436292092, 0.8897984198549079, 0.8856601748234387)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0d18da57114b4e0ab646fcb52860dabd",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9cfb2facab9c4b56924c27e287ba05d4",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.9144309250302297, 0.919752763828645, 0.9170841238373373)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a0b5a4064324446c8a1a0c70d07cda59",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "dea1cbf55a0c43fa84167e376b309125",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.9361905528132853, 0.9398110097060626, 0.9379972877369673)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0524c9827f294852a5cd271bfbdbfcd2",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "04b306aaa0604677aa251727feacd2ba",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.9519541852390448, 0.9547763044748607, 0.9533631563717097)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1404f55a3ce546c2b99ddc12679b5d97",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c848b31135b14155b98aeaea7b8ac2be",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.960722713444972, 0.9632376346282668, 0.961978530336279)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8713c93530b94398a96354138783e326",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "57d75f0d65be4ca4a5401cb7ed3d5fe0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.9697570414352719, 0.9714709221947199, 0.9706132252353172)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "47614d60ab4b4f0abd56c395420103a6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2b88316f13234cdeb1486f66cf08d5b6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.9760554565110192, 0.9779891394717963, 0.9770213412246582)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9df02fe711de4ccbac63cbcc77b9185b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d45a98359f90443b9de4017908729121",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.9811127302761178, 0.9819703829690195, 0.9815413692723396)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "85e862f466b049089dd1ae4d4b3b25b4",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3c38cdbf365448a8bec573e7f9bf3831",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.984655071665091, 0.9846831395763159, 0.9846691054206851)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fc3a0e186cb94c649f6d489d8afe0b28",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "aa122bd240074ffb9ad1a8e8df787497",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.9871442343767067, 0.9875194192515452, 0.9873317911716786)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "bc95fc4c8eb84aa99bc9c09e449edc53",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "36dc7b6450a24bac82ced0efb0d9c4a0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.9893908786272786, 0.9889114292094049, 0.9891510958201069)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ec3fe5ad630e42b181096b84f985428f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3b1bcac29f3f4f5d949ee0860c345329",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.9911312527046112, 0.9901989196482444, 0.9906648668174991)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4dd0d5d81d5943f1ad7d7451014727d9",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "98ef88bd0dcf45fd9ebeaf2ab77b2dbf",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.9924332083291745, 0.9919900041332719, 0.9922115567382627)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "99089ca7a97a4168a1cc46d1eba49a62",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "294ec84096cd474a80ebff9c36ea0644",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.9930640069977942, 0.9924270857582653, 0.9927454442197611)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "130e89d10ba54246a65645381c69538a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "11ae3075d74443a79bbd4789a5bdd9b7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.9739162872556146, 0.9674801769230403, 0.9706875636048171)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "cfaa4824a18046d898818cbed675450e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ba9b8454320c4d53b041f6f424b773ab",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.9848088502477955, 0.9837187094689933, 0.9842634780066597)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a5c8d9a82a9b4b4f8a58b83dcf50dfc8",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "286d0b6ad83146e4911b967e9cbde195",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.9808100926458495, 0.9802695653413275, 0.9805397545015183)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b6342f18da15402e9c391550235a7ded",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5fc5c4feee604ddea5ead7ff203f07e8",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.9668917478143436, 0.9694090371376854, 0.968148756174055)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "927b69c7183442aeaf3dc08ae3e20cbe",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "15f90bd1520b4773bcbb699f985ea031",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=945.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.9793555195345366, 0.9788157938495013, 0.979085582310423)\n"
+     ]
+    }
+   ],
+   "source": [
+    "for i in range(NUM_EPOCHS):\n",
+    "    gru.train()\n",
+    "    crf.train()\n",
+    "    for i in tqdm(range(len(train_labels))):\n",
+    "        batch_tokens = train_tokens_ids[i].unsqueeze(1)\n",
+    "        tags = train_labels[i].unsqueeze(1)\n",
+    "        emissions = gru(batch_tokens).squeeze(0)\n",
+    "        optimizer.zero_grad()\n",
+    "        loss = -crf(emissions,tags.squeeze(0))\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "    gru.eval()\n",
+    "    crf.eval()\n",
+    "    print(eval_model(train_tokens_ids, train_labels, gru))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## dev-0 i test-A"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def predict_labels(dataset_tokens, dataset_labels, model):\n",
+    "    print(len(dataset_tokens[0]), len(dataset_labels[0]))\n",
+    "    Y_true = []\n",
+    "    Y_pred = []\n",
+    "    result = []\n",
+    "    for i in tqdm(range(len(dataset_labels))):\n",
+    "        batch_tokens = dataset_tokens[i].unsqueeze(1)\n",
+    "        tags = list(dataset_labels[i].numpy())\n",
+    "        emissions = gru(batch_tokens).squeeze(0)\n",
+    "        tmp = crf.decode(emissions)[0]\n",
+    "        Y_pred += tmp\n",
+    "        result += [tmp]\n",
+    "        Y_true += tags\n",
+    "    print(get_scores(Y_true, Y_pred))\n",
+    "    return result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('dev-0/in.tsv', \"r\", encoding=\"utf-8\") as f:\n",
+    "    dev_0_data = [line.rstrip() for line in f]\n",
+    "    \n",
+    "dev_0_data = [i.split() for i in dev_0_data]\n",
+    "dev_0_tokens_ids = data_process(dev_0_data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('dev-0/expected.tsv', \"r\", encoding=\"utf-8\") as f:\n",
+    "    dev_0_labels = [line.rstrip() for line in f]\n",
+    "    \n",
+    "dev_0_labels = [i.split() for i in dev_0_labels]\n",
+    "dev_0_labels = labels_process([[ner_tags_set.index(token) for token in doc] for doc in dev_0_labels])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "458 458\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e48f16faacc043ac8237af22f32b0af1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=215.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "(0.9501477944520237, 0.9535808009736432, 0.9518612023310112)\n"
+     ]
+    }
+   ],
+   "source": [
+    "tmp = predict_labels(dev_0_tokens_ids, dev_0_labels, gru)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "r = [[ner_tags_set[i] for i in tmp2] for tmp2 in tmp]\n",
+    "r = [i[1:-1] for i in r]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for doc in r:\n",
+    "    if doc[0] != 'O':\n",
+    "        doc[0] = 'B' + doc[0][1:]\n",
+    "    for i in range(len(doc))[:-1]:\n",
+    "        if doc[i] == 'O':\n",
+    "            if doc[i + 1] != 'O':\n",
+    "                doc[i + 1] = 'B' + doc[i + 1][1:]\n",
+    "        elif doc[i + 1] != 'O':\n",
+    "            if doc[i][1:] == doc[i + 1][1:]:\n",
+    "                doc[i + 1] = 'I' + doc[i + 1][1:]\n",
+    "            else:\n",
+    "                doc[i + 1] = 'B' + doc[i + 1][1:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "f = open(\"dev-0/out.tsv\", \"a\")\n",
+    "for i in r:\n",
+    "    f.write(' '.join(i) + '\\n')\n",
+    "f.close()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9cce1860765e420f9b0bfaa23b651f58",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=215.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "42e2565e95db4efb9343d93f195212d5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "HBox(children=(FloatProgress(value=0.0, max=230.0), HTML(value='')))"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "def predict(path, model):\n",
+    "    with open(path + '/in.tsv', \"r\", encoding=\"utf-8\") as f:\n",
+    "        data = [line.rstrip() for line in f]\n",
+    "    data = [i.split() for i in data]\n",
+    "    tokens_ids = data_process(data)\n",
+    "    \n",
+    "    Y_true = []\n",
+    "    Y_pred = []\n",
+    "    result = []\n",
+    "    for i in tqdm(range(len(tokens_ids))):\n",
+    "        batch_tokens = tokens_ids[i].unsqueeze(1)\n",
+    "        emissions = gru(batch_tokens).squeeze(0)\n",
+    "        tmp = crf.decode(emissions)[0]\n",
+    "        Y_pred += tmp\n",
+    "        result += [tmp]\n",
+    "    r = [[ner_tags_set[i] for i in tmp] for tmp in result]\n",
+    "    r = [i[1:-1] for i in r]\n",
+    "    for doc in r:\n",
+    "        if doc[0] != 'O':\n",
+    "            doc[0] = 'B' + doc[0][1:]\n",
+    "        for i in range(len(doc))[:-1]:\n",
+    "            if doc[i] == 'O':\n",
+    "                if doc[i + 1] != 'O':\n",
+    "                    doc[i + 1] = 'B' + doc[i + 1][1:]\n",
+    "            elif doc[i + 1] != 'O':\n",
+    "                if doc[i][1:] == doc[i + 1][1:]:\n",
+    "                    doc[i + 1] = 'I' + doc[i + 1][1:]\n",
+    "                else:\n",
+    "                    doc[i + 1] = 'B' + doc[i + 1][1:]\n",
+    "    f = open(path + \"/out.tsv\", \"a\")\n",
+    "    for i in r:\n",
+    "        f.write(' '.join(i) + '\\n')\n",
+    "    f.close()\n",
+    "    return result\n",
+    "\n",
+    "result = predict('dev-0', gru)\n",
+    "result = predict('test-A', gru)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/rnn_fras.py b/rnn_fras.py
new file mode 100644
index 0000000..a4fe179
--- /dev/null
+++ b/rnn_fras.py
@@ -0,0 +1,340 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# ## Zadanie domowe
+# 
+# 
+# - sklonować repozytorium https://git.wmi.amu.edu.pl/kubapok/en-ner-conll-2003
+# - stworzyć model seq labelling bazujący na sieci neuronowej opisanej w punkcie niżej (można bazować na tym jupyterze lub nie).
+# - model sieci to GRU (o dowolnych parametrach) + CRF w pytorchu korzystając z modułu CRF z poprzednich zajęć- - stworzyć predykcje w plikach dev-0/out.tsv oraz test-A/out.tsv
+# - wynik fscore sprawdzony za pomocą narzędzia geval (patrz poprzednie zadanie) powinien wynosić conajmniej 0.65
+# - proszę umieścić predykcję oraz skrypty generujące (w postaci tekstowej a nie jupyter) w repo, a w MS TEAMS umieścić link do swojego repo
+# termin 22.06, 60 punktów, za najlepszy wynik- 100 punktów
+#  
+
+# In[2]:
+
+
+import numpy as np
+import torch
+from torchtext.vocab import Vocab
+from collections import Counter
+from tqdm.notebook import tqdm
+import lzma
+import itertools
+from torchcrf import CRF
+
+
+# In[3]:
+
+
+def read_data(filename):
+    all_data = lzma.open(filename).read().decode('UTF-8').split('\n')
+    return [line.split('\t') for line in all_data][:-1]
+
+
+# In[4]:
+
+
+def data_process(dt):
+    return [torch.tensor([vocab['<bos>']] + [vocab[token] for token in document] + [vocab['<eos>']], dtype = torch.long) for document in dt]
+
+
+# In[5]:
+
+
+def labels_process(dt):
+    return [ torch.tensor([0] + document + [0], dtype = torch.long) for document in dt]
+
+
+# In[6]:
+
+
+def build_vocab(dataset):
+    counter = Counter()
+    for document in dataset:
+        counter.update(document)
+    return Vocab(counter, specials=['<unk>', '<pad>', '<bos>', '<eos>'])
+
+
+# In[7]:
+
+
+train_data = read_data('train/train.tsv.xz')
+
+tokens, ner_tags = [], []
+for i in train_data:
+    ner_tags.append(i[0].split())
+    tokens.append(i[1].split())
+
+
+# In[8]:
+
+
+vocab = build_vocab(tokens)
+
+
+# In[9]:
+
+
+train_tokens_ids = data_process(tokens)
+
+
+# In[10]:
+
+
+ner_tags_set = list(set(itertools.chain(*ner_tags)))
+ner_tags_set.sort()
+print(ner_tags_set)
+train_labels = labels_process([[ner_tags_set.index(token) for token in doc] for doc in ner_tags])
+
+
+# In[11]:
+
+
+num_tags = max([max(x) for x in train_labels]) + 1 
+
+
+# In[12]:
+
+
+class GRU(torch.nn.Module):
+
+    def __init__(self):
+        super(GRU, self).__init__()
+        self.emb = torch.nn.Embedding(len(vocab.itos),100)
+        self.dropout = torch.nn.Dropout(0.2)
+        self.rec = torch.nn.GRU(100, 256, 2, batch_first = True, bidirectional = True)
+        self.fc1 = torch.nn.Linear(2* 256 , 9)
+        
+    def forward(self, x):
+        emb = torch.relu(self.emb(x))
+        emb = self.dropout(emb)
+        gru_output, h_n = self.rec(emb)
+        out_weights = self.fc1(gru_output)
+        return out_weights
+
+
+# In[13]:
+
+
+def get_scores(y_true, y_pred):
+    acc_score = 0
+    tp = 0
+    fp = 0
+    selected_items = 0
+    relevant_items = 0 
+
+    for p,t in zip(y_pred, y_true):
+        if p == t:
+            acc_score +=1
+
+        if p > 0 and p == t:
+            tp +=1
+
+        if p > 0:
+            selected_items += 1
+
+        if t > 0 :
+            relevant_items +=1
+            
+    if selected_items == 0:
+        precision = 1.0
+    else:
+        precision = tp / selected_items
+            
+    if relevant_items == 0:
+        recall = 1.0
+    else:
+        recall = tp / relevant_items
+    
+    if precision + recall == 0.0 :
+        f1 = 0.0
+    else:
+        f1 = 2* precision * recall  / (precision + recall)
+
+    return precision, recall, f1
+
+
+# In[14]:
+
+
+def eval_model(dataset_tokens, dataset_labels, model):
+    Y_true = []
+    Y_pred = []
+    for i in tqdm(range(len(dataset_labels))):
+        batch_tokens = dataset_tokens[i].unsqueeze(1)
+        tags = list(dataset_labels[i].numpy())
+        emissions = gru(batch_tokens).squeeze(0)
+        Y_pred += crf.decode(emissions)[0]
+        Y_true += tags
+    return get_scores(Y_true, Y_pred)
+
+
+# In[15]:
+
+
+gru = GRU()
+crf = CRF(num_tags)
+
+
+# In[16]:
+
+
+params = list(gru.parameters()) + list(crf.parameters())
+optimizer = torch.optim.Adam(params)
+
+
+# In[17]:
+
+
+NUM_EPOCHS = 20
+
+
+# In[18]:
+
+
+criterion = torch.nn.CrossEntropyLoss()
+
+
+# In[19]:
+
+
+for i in range(NUM_EPOCHS):
+    gru.train()
+    crf.train()
+    for i in tqdm(range(len(train_labels))):
+        batch_tokens = train_tokens_ids[i].unsqueeze(1)
+        tags = train_labels[i].unsqueeze(1)
+        emissions = gru(batch_tokens).squeeze(0)
+        optimizer.zero_grad()
+        loss = -crf(emissions,tags.squeeze(0))
+        loss.backward()
+        optimizer.step()
+    gru.eval()
+    crf.eval()
+    print(eval_model(train_tokens_ids, train_labels, gru))
+
+
+# ## dev-0 i test-A
+
+# In[20]:
+
+
+def predict_labels(dataset_tokens, dataset_labels, model):
+    print(len(dataset_tokens[0]), len(dataset_labels[0]))
+    Y_true = []
+    Y_pred = []
+    result = []
+    for i in tqdm(range(len(dataset_labels))):
+        batch_tokens = dataset_tokens[i].unsqueeze(1)
+        tags = list(dataset_labels[i].numpy())
+        emissions = gru(batch_tokens).squeeze(0)
+        tmp = crf.decode(emissions)[0]
+        Y_pred += tmp
+        result += [tmp]
+        Y_true += tags
+    print(get_scores(Y_true, Y_pred))
+    return result
+
+
+# In[21]:
+
+
+with open('dev-0/in.tsv', "r", encoding="utf-8") as f:
+    dev_0_data = [line.rstrip() for line in f]
+    
+dev_0_data = [i.split() for i in dev_0_data]
+dev_0_tokens_ids = data_process(dev_0_data)
+
+
+# In[22]:
+
+
+with open('dev-0/expected.tsv', "r", encoding="utf-8") as f:
+    dev_0_labels = [line.rstrip() for line in f]
+    
+dev_0_labels = [i.split() for i in dev_0_labels]
+dev_0_labels = labels_process([[ner_tags_set.index(token) for token in doc] for doc in dev_0_labels])
+
+
+# In[23]:
+
+
+tmp = predict_labels(dev_0_tokens_ids, dev_0_labels, gru)
+
+
+# In[24]:
+
+
+r = [[ner_tags_set[i] for i in tmp2] for tmp2 in tmp]
+r = [i[1:-1] for i in r]
+
+
+# In[25]:
+
+
+for doc in r:
+    if doc[0] != 'O':
+        doc[0] = 'B' + doc[0][1:]
+    for i in range(len(doc))[:-1]:
+        if doc[i] == 'O':
+            if doc[i + 1] != 'O':
+                doc[i + 1] = 'B' + doc[i + 1][1:]
+        elif doc[i + 1] != 'O':
+            if doc[i][1:] == doc[i + 1][1:]:
+                doc[i + 1] = 'I' + doc[i + 1][1:]
+            else:
+                doc[i + 1] = 'B' + doc[i + 1][1:]
+
+
+# In[26]:
+
+
+f = open("dev-0/out.tsv", "a")
+for i in r:
+    f.write(' '.join(i) + '\n')
+f.close()
+
+
+# In[27]:
+
+
+def predict(path, model):
+    with open(path + '/in.tsv', "r", encoding="utf-8") as f:
+        data = [line.rstrip() for line in f]
+    data = [i.split() for i in data]
+    tokens_ids = data_process(data)
+    
+    Y_true = []
+    Y_pred = []
+    result = []
+    for i in tqdm(range(len(tokens_ids))):
+        batch_tokens = tokens_ids[i].unsqueeze(1)
+        emissions = gru(batch_tokens).squeeze(0)
+        tmp = crf.decode(emissions)[0]
+        Y_pred += tmp
+        result += [tmp]
+    r = [[ner_tags_set[i] for i in tmp] for tmp in result]
+    r = [i[1:-1] for i in r]
+    for doc in r:
+        if doc[0] != 'O':
+            doc[0] = 'B' + doc[0][1:]
+        for i in range(len(doc))[:-1]:
+            if doc[i] == 'O':
+                if doc[i + 1] != 'O':
+                    doc[i + 1] = 'B' + doc[i + 1][1:]
+            elif doc[i + 1] != 'O':
+                if doc[i][1:] == doc[i + 1][1:]:
+                    doc[i + 1] = 'I' + doc[i + 1][1:]
+                else:
+                    doc[i + 1] = 'B' + doc[i + 1][1:]
+    f = open(path + "/out.tsv", "a")
+    for i in r:
+        f.write(' '.join(i) + '\n')
+    f.close()
+    return result
+
+result = predict('dev-0', gru)
+result = predict('test-A', gru)
+