959 lines
305 KiB
Plaintext
959 lines
305 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Uczenie głębokie – przetwarzanie tekstu – laboratoria\n",
|
|||
|
"# 3. RNN"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 24,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Requirement already satisfied: torch in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (2.3.0)\n",
|
|||
|
"Requirement already satisfied: torchtext in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (0.18.0)\n",
|
|||
|
"Requirement already satisfied: filelock in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from torch) (3.14.0)\n",
|
|||
|
"Requirement already satisfied: typing-extensions>=4.8.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from torch) (4.11.0)\n",
|
|||
|
"Requirement already satisfied: sympy in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from torch) (1.12)\n",
|
|||
|
"Requirement already satisfied: networkx in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from torch) (3.3)\n",
|
|||
|
"Requirement already satisfied: jinja2 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from torch) (3.1.4)\n",
|
|||
|
"Requirement already satisfied: fsspec in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from torch) (2024.3.1)\n",
|
|||
|
"Requirement already satisfied: mkl<=2021.4.0,>=2021.1.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from torch) (2021.4.0)\n",
|
|||
|
"Requirement already satisfied: tqdm in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from torchtext) (4.66.4)\n",
|
|||
|
"Requirement already satisfied: requests in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from torchtext) (2.32.2)\n",
|
|||
|
"Requirement already satisfied: numpy in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from torchtext) (1.26.4)\n",
|
|||
|
"Requirement already satisfied: intel-openmp==2021.* in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from mkl<=2021.4.0,>=2021.1.1->torch) (2021.4.0)\n",
|
|||
|
"Requirement already satisfied: tbb==2021.* in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from mkl<=2021.4.0,>=2021.1.1->torch) (2021.12.0)\n",
|
|||
|
"Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jinja2->torch) (2.1.5)\n",
|
|||
|
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from requests->torchtext) (3.3.2)\n",
|
|||
|
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from requests->torchtext) (3.7)\n",
|
|||
|
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from requests->torchtext) (2.2.1)\n",
|
|||
|
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from requests->torchtext) (2024.2.2)\n",
|
|||
|
"Requirement already satisfied: mpmath>=0.19 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from sympy->torch) (1.3.0)\n",
|
|||
|
"Requirement already satisfied: colorama in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from tqdm->torchtext) (0.4.6)\n",
|
|||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
|||
|
"Requirement already satisfied: torch in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (2.3.0)\n",
|
|||
|
"Requirement already satisfied: datasets in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (2.19.1)\n",
|
|||
|
"Requirement already satisfied: filelock in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from torch) (3.14.0)\n",
|
|||
|
"Requirement already satisfied: typing-extensions>=4.8.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from torch) (4.11.0)\n",
|
|||
|
"Requirement already satisfied: sympy in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from torch) (1.12)\n",
|
|||
|
"Requirement already satisfied: networkx in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from torch) (3.3)\n",
|
|||
|
"Requirement already satisfied: jinja2 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from torch) (3.1.4)\n",
|
|||
|
"Requirement already satisfied: fsspec in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from torch) (2024.3.1)\n",
|
|||
|
"Requirement already satisfied: mkl<=2021.4.0,>=2021.1.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from torch) (2021.4.0)\n",
|
|||
|
"Requirement already satisfied: numpy>=1.17 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from datasets) (1.26.4)\n",
|
|||
|
"Requirement already satisfied: pyarrow>=12.0.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from datasets) (16.1.0)\n",
|
|||
|
"Requirement already satisfied: pyarrow-hotfix in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from datasets) (0.6)\n",
|
|||
|
"Requirement already satisfied: dill<0.3.9,>=0.3.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from datasets) (0.3.8)\n",
|
|||
|
"Requirement already satisfied: pandas in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from datasets) (2.2.2)\n",
|
|||
|
"Requirement already satisfied: requests>=2.19.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from datasets) (2.32.2)\n",
|
|||
|
"Requirement already satisfied: tqdm>=4.62.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from datasets) (4.66.4)\n",
|
|||
|
"Requirement already satisfied: xxhash in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from datasets) (3.4.1)\n",
|
|||
|
"Requirement already satisfied: multiprocess in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from datasets) (0.70.16)\n",
|
|||
|
"Requirement already satisfied: aiohttp in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from datasets) (3.9.5)\n",
|
|||
|
"Requirement already satisfied: huggingface-hub>=0.21.2 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from datasets) (0.23.1)\n",
|
|||
|
"Requirement already satisfied: packaging in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from datasets) (24.0)\n",
|
|||
|
"Requirement already satisfied: pyyaml>=5.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from datasets) (6.0.1)\n",
|
|||
|
"Requirement already satisfied: aiosignal>=1.1.2 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from aiohttp->datasets) (1.3.1)\n",
|
|||
|
"Requirement already satisfied: attrs>=17.3.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from aiohttp->datasets) (23.2.0)\n",
|
|||
|
"Requirement already satisfied: frozenlist>=1.1.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from aiohttp->datasets) (1.4.1)\n",
|
|||
|
"Requirement already satisfied: multidict<7.0,>=4.5 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from aiohttp->datasets) (6.0.5)\n",
|
|||
|
"Requirement already satisfied: yarl<2.0,>=1.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from aiohttp->datasets) (1.9.4)\n",
|
|||
|
"Requirement already satisfied: intel-openmp==2021.* in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from mkl<=2021.4.0,>=2021.1.1->torch) (2021.4.0)\n",
|
|||
|
"Requirement already satisfied: tbb==2021.* in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from mkl<=2021.4.0,>=2021.1.1->torch) (2021.12.0)\n",
|
|||
|
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from requests>=2.19.0->datasets) (3.3.2)\n",
|
|||
|
"Requirement already satisfied: idna<4,>=2.5 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from requests>=2.19.0->datasets) (3.7)\n",
|
|||
|
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from requests>=2.19.0->datasets) (2.2.1)\n",
|
|||
|
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from requests>=2.19.0->datasets) (2024.2.2)\n",
|
|||
|
"Requirement already satisfied: colorama in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from tqdm>=4.62.1->datasets) (0.4.6)\n",
|
|||
|
"Requirement already satisfied: MarkupSafe>=2.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jinja2->torch) (2.1.5)\n",
|
|||
|
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from pandas->datasets) (2.9.0.post0)\n",
|
|||
|
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from pandas->datasets) (2024.1)\n",
|
|||
|
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from pandas->datasets) (2024.1)\n",
|
|||
|
"Requirement already satisfied: mpmath>=0.19 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from sympy->torch) (1.3.0)\n",
|
|||
|
"Requirement already satisfied: six>=1.5 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n",
|
|||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
|||
|
"Requirement already satisfied: ipywidgets in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (8.1.2)\n",
|
|||
|
"Requirement already satisfied: comm>=0.1.3 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipywidgets) (0.2.2)\n",
|
|||
|
"Requirement already satisfied: ipython>=6.1.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipywidgets) (8.24.0)\n",
|
|||
|
"Requirement already satisfied: traitlets>=4.3.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipywidgets) (5.14.3)\n",
|
|||
|
"Requirement already satisfied: widgetsnbextension~=4.0.10 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipywidgets) (4.0.10)\n",
|
|||
|
"Requirement already satisfied: jupyterlab-widgets~=3.0.10 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipywidgets) (3.0.10)\n",
|
|||
|
"Requirement already satisfied: decorator in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (5.1.1)\n",
|
|||
|
"Requirement already satisfied: jedi>=0.16 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.19.1)\n",
|
|||
|
"Requirement already satisfied: matplotlib-inline in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.1.7)\n",
|
|||
|
"Requirement already satisfied: prompt-toolkit<3.1.0,>=3.0.41 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (3.0.43)\n",
|
|||
|
"Requirement already satisfied: pygments>=2.4.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (2.18.0)\n",
|
|||
|
"Requirement already satisfied: stack-data in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.6.3)\n",
|
|||
|
"Requirement already satisfied: colorama in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipython>=6.1.0->ipywidgets) (0.4.6)\n",
|
|||
|
"Requirement already satisfied: parso<0.9.0,>=0.8.3 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jedi>=0.16->ipython>=6.1.0->ipywidgets) (0.8.4)\n",
|
|||
|
"Requirement already satisfied: wcwidth in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from prompt-toolkit<3.1.0,>=3.0.41->ipython>=6.1.0->ipywidgets) (0.2.13)\n",
|
|||
|
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (2.0.1)\n",
|
|||
|
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (2.4.1)\n",
|
|||
|
"Requirement already satisfied: pure-eval in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from stack-data->ipython>=6.1.0->ipywidgets) (0.2.2)\n",
|
|||
|
"Requirement already satisfied: six>=1.12.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from asttokens>=2.1.0->stack-data->ipython>=6.1.0->ipywidgets) (1.16.0)\n",
|
|||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
|||
|
"Requirement already satisfied: tdqm in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (0.0.1)\n",
|
|||
|
"Requirement already satisfied: tqdm in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from tdqm) (4.66.4)\n",
|
|||
|
"Requirement already satisfied: colorama in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from tqdm->tdqm) (0.4.6)\n",
|
|||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
|||
|
"Requirement already satisfied: pandas in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (2.2.2)\n",
|
|||
|
"Requirement already satisfied: numpy>=1.26.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from pandas) (1.26.4)\n",
|
|||
|
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from pandas) (2.9.0.post0)\n",
|
|||
|
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from pandas) (2024.1)\n",
|
|||
|
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from pandas) (2024.1)\n",
|
|||
|
"Requirement already satisfied: six>=1.5 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
|
|||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
|||
|
"Requirement already satisfied: nltk in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (3.8.1)\n",
|
|||
|
"Requirement already satisfied: click in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from nltk) (8.1.7)\n",
|
|||
|
"Requirement already satisfied: joblib in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from nltk) (1.4.2)\n",
|
|||
|
"Requirement already satisfied: regex>=2021.8.3 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from nltk) (2024.5.15)\n",
|
|||
|
"Requirement already satisfied: tqdm in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from nltk) (4.66.4)\n",
|
|||
|
"Requirement already satisfied: colorama in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from click->nltk) (0.4.6)\n",
|
|||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
|||
|
"Requirement already satisfied: unidecode in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (1.3.8)\n",
|
|||
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"usage: jupyter [-h] [--version] [--config-dir] [--data-dir] [--runtime-dir]\n",
|
|||
|
" [--paths] [--json] [--debug]\n",
|
|||
|
" [subcommand]\n",
|
|||
|
"\n",
|
|||
|
"Jupyter: Interactive Computing\n",
|
|||
|
"\n",
|
|||
|
"positional arguments:\n",
|
|||
|
" subcommand the subcommand to launch\n",
|
|||
|
"\n",
|
|||
|
"options:\n",
|
|||
|
" -h, --help show this help message and exit\n",
|
|||
|
" --version show the versions of core jupyter packages and exit\n",
|
|||
|
" --config-dir show Jupyter config dir\n",
|
|||
|
" --data-dir show Jupyter data dir\n",
|
|||
|
" --runtime-dir show Jupyter runtime dir\n",
|
|||
|
" --paths show all Jupyter paths. Add --json for machine-readable\n",
|
|||
|
" format.\n",
|
|||
|
" --json output paths as machine-readable json\n",
|
|||
|
" --debug output debug information about paths\n",
|
|||
|
"\n",
|
|||
|
"Available subcommands: console dejavu events execute kernel kernelspec lab\n",
|
|||
|
"labextension labhub migrate nbconvert notebook qtconsole run server\n",
|
|||
|
"troubleshoot trust\n",
|
|||
|
"\n",
|
|||
|
"Jupyter command `jupyter-nbextension` not found.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Requirement already satisfied: ipykernel in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (6.29.4)\n",
|
|||
|
"Requirement already satisfied: jupyter in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (1.0.0)\n",
|
|||
|
"Requirement already satisfied: comm>=0.1.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipykernel) (0.2.2)\n",
|
|||
|
"Requirement already satisfied: debugpy>=1.6.5 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipykernel) (1.8.1)\n",
|
|||
|
"Requirement already satisfied: ipython>=7.23.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipykernel) (8.24.0)\n",
|
|||
|
"Requirement already satisfied: jupyter-client>=6.1.12 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipykernel) (8.6.2)\n",
|
|||
|
"Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipykernel) (5.7.2)\n",
|
|||
|
"Requirement already satisfied: matplotlib-inline>=0.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipykernel) (0.1.7)\n",
|
|||
|
"Requirement already satisfied: nest-asyncio in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipykernel) (1.6.0)\n",
|
|||
|
"Requirement already satisfied: packaging in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipykernel) (24.0)\n",
|
|||
|
"Requirement already satisfied: psutil in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipykernel) (5.9.8)\n",
|
|||
|
"Requirement already satisfied: pyzmq>=24 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipykernel) (26.0.3)\n",
|
|||
|
"Requirement already satisfied: tornado>=6.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipykernel) (6.4)\n",
|
|||
|
"Requirement already satisfied: traitlets>=5.4.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipykernel) (5.14.3)\n",
|
|||
|
"Requirement already satisfied: notebook in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter) (7.2.0)\n",
|
|||
|
"Requirement already satisfied: qtconsole in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter) (5.5.2)\n",
|
|||
|
"Requirement already satisfied: jupyter-console in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter) (6.6.3)\n",
|
|||
|
"Requirement already satisfied: nbconvert in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter) (7.16.4)\n",
|
|||
|
"Requirement already satisfied: ipywidgets in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter) (8.1.2)\n",
|
|||
|
"Requirement already satisfied: decorator in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (5.1.1)\n",
|
|||
|
"Requirement already satisfied: jedi>=0.16 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (0.19.1)\n",
|
|||
|
"Requirement already satisfied: prompt-toolkit<3.1.0,>=3.0.41 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (3.0.43)\n",
|
|||
|
"Requirement already satisfied: pygments>=2.4.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (2.18.0)\n",
|
|||
|
"Requirement already satisfied: stack-data in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (0.6.3)\n",
|
|||
|
"Requirement already satisfied: colorama in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipython>=7.23.1->ipykernel) (0.4.6)\n",
|
|||
|
"Requirement already satisfied: python-dateutil>=2.8.2 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter-client>=6.1.12->ipykernel) (2.9.0.post0)\n",
|
|||
|
"Requirement already satisfied: platformdirs>=2.5 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel) (4.2.2)\n",
|
|||
|
"Requirement already satisfied: pywin32>=300 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel) (306)\n",
|
|||
|
"Requirement already satisfied: widgetsnbextension~=4.0.10 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipywidgets->jupyter) (4.0.10)\n",
|
|||
|
"Requirement already satisfied: jupyterlab-widgets~=3.0.10 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from ipywidgets->jupyter) (3.0.10)\n",
|
|||
|
"Requirement already satisfied: beautifulsoup4 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from nbconvert->jupyter) (4.12.3)\n",
|
|||
|
"Requirement already satisfied: bleach!=5.0.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from nbconvert->jupyter) (6.1.0)\n",
|
|||
|
"Requirement already satisfied: defusedxml in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from nbconvert->jupyter) (0.7.1)\n",
|
|||
|
"Requirement already satisfied: jinja2>=3.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from nbconvert->jupyter) (3.1.4)\n",
|
|||
|
"Requirement already satisfied: jupyterlab-pygments in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from nbconvert->jupyter) (0.3.0)\n",
|
|||
|
"Requirement already satisfied: markupsafe>=2.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from nbconvert->jupyter) (2.1.5)\n",
|
|||
|
"Requirement already satisfied: mistune<4,>=2.0.3 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from nbconvert->jupyter) (3.0.2)\n",
|
|||
|
"Requirement already satisfied: nbclient>=0.5.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from nbconvert->jupyter) (0.10.0)\n",
|
|||
|
"Requirement already satisfied: nbformat>=5.7 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from nbconvert->jupyter) (5.10.4)\n",
|
|||
|
"Requirement already satisfied: pandocfilters>=1.4.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from nbconvert->jupyter) (1.5.1)\n",
|
|||
|
"Requirement already satisfied: tinycss2 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from nbconvert->jupyter) (1.3.0)\n",
|
|||
|
"Requirement already satisfied: jupyter-server<3,>=2.4.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from notebook->jupyter) (2.14.0)\n",
|
|||
|
"Requirement already satisfied: jupyterlab-server<3,>=2.27.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from notebook->jupyter) (2.27.2)\n",
|
|||
|
"Requirement already satisfied: jupyterlab<4.3,>=4.2.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from notebook->jupyter) (4.2.1)\n",
|
|||
|
"Requirement already satisfied: notebook-shim<0.3,>=0.2 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from notebook->jupyter) (0.2.4)\n",
|
|||
|
"Requirement already satisfied: qtpy>=2.4.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from qtconsole->jupyter) (2.4.1)\n",
|
|||
|
"Requirement already satisfied: six>=1.9.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from bleach!=5.0.0->nbconvert->jupyter) (1.16.0)\n",
|
|||
|
"Requirement already satisfied: webencodings in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from bleach!=5.0.0->nbconvert->jupyter) (0.5.1)\n",
|
|||
|
"Requirement already satisfied: parso<0.9.0,>=0.8.3 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jedi>=0.16->ipython>=7.23.1->ipykernel) (0.8.4)\n",
|
|||
|
"Requirement already satisfied: anyio>=3.1.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook->jupyter) (4.3.0)\n",
|
|||
|
"Requirement already satisfied: argon2-cffi>=21.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook->jupyter) (23.1.0)\n",
|
|||
|
"Requirement already satisfied: jupyter-events>=0.9.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook->jupyter) (0.10.0)\n",
|
|||
|
"Requirement already satisfied: jupyter-server-terminals>=0.4.4 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook->jupyter) (0.5.3)\n",
|
|||
|
"Requirement already satisfied: overrides>=5.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook->jupyter) (7.7.0)\n",
|
|||
|
"Requirement already satisfied: prometheus-client>=0.9 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook->jupyter) (0.20.0)\n",
|
|||
|
"Requirement already satisfied: pywinpty>=2.0.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook->jupyter) (2.0.13)\n",
|
|||
|
"Requirement already satisfied: send2trash>=1.8.2 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook->jupyter) (1.8.3)\n",
|
|||
|
"Requirement already satisfied: terminado>=0.8.3 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook->jupyter) (0.18.1)\n",
|
|||
|
"Requirement already satisfied: websocket-client>=1.7 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter-server<3,>=2.4.0->notebook->jupyter) (1.8.0)\n",
|
|||
|
"Requirement already satisfied: async-lru>=1.0.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyterlab<4.3,>=4.2.0->notebook->jupyter) (2.0.4)\n",
|
|||
|
"Requirement already satisfied: httpx>=0.25.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyterlab<4.3,>=4.2.0->notebook->jupyter) (0.27.0)\n",
|
|||
|
"Requirement already satisfied: jupyter-lsp>=2.0.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyterlab<4.3,>=4.2.0->notebook->jupyter) (2.2.5)\n",
|
|||
|
"Requirement already satisfied: babel>=2.10 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyterlab-server<3,>=2.27.1->notebook->jupyter) (2.15.0)\n",
|
|||
|
"Requirement already satisfied: json5>=0.9.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyterlab-server<3,>=2.27.1->notebook->jupyter) (0.9.25)\n",
|
|||
|
"Requirement already satisfied: jsonschema>=4.18.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyterlab-server<3,>=2.27.1->notebook->jupyter) (4.22.0)\n",
|
|||
|
"Requirement already satisfied: requests>=2.31 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyterlab-server<3,>=2.27.1->notebook->jupyter) (2.32.2)\n",
|
|||
|
"Requirement already satisfied: fastjsonschema>=2.15 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from nbformat>=5.7->nbconvert->jupyter) (2.19.1)\n",
|
|||
|
"Requirement already satisfied: wcwidth in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from prompt-toolkit<3.1.0,>=3.0.41->ipython>=7.23.1->ipykernel) (0.2.13)\n",
|
|||
|
"Requirement already satisfied: soupsieve>1.2 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from beautifulsoup4->nbconvert->jupyter) (2.5)\n",
|
|||
|
"Requirement already satisfied: executing>=1.2.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from stack-data->ipython>=7.23.1->ipykernel) (2.0.1)\n",
|
|||
|
"Requirement already satisfied: asttokens>=2.1.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from stack-data->ipython>=7.23.1->ipykernel) (2.4.1)\n",
|
|||
|
"Requirement already satisfied: pure-eval in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from stack-data->ipython>=7.23.1->ipykernel) (0.2.2)\n",
|
|||
|
"Requirement already satisfied: idna>=2.8 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook->jupyter) (3.7)\n",
|
|||
|
"Requirement already satisfied: sniffio>=1.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook->jupyter) (1.3.1)\n",
|
|||
|
"Requirement already satisfied: argon2-cffi-bindings in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->notebook->jupyter) (21.2.0)\n",
|
|||
|
"Requirement already satisfied: certifi in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from httpx>=0.25.0->jupyterlab<4.3,>=4.2.0->notebook->jupyter) (2024.2.2)\n",
|
|||
|
"Requirement already satisfied: httpcore==1.* in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from httpx>=0.25.0->jupyterlab<4.3,>=4.2.0->notebook->jupyter) (1.0.5)\n",
|
|||
|
"Requirement already satisfied: h11<0.15,>=0.13 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from httpcore==1.*->httpx>=0.25.0->jupyterlab<4.3,>=4.2.0->notebook->jupyter) (0.14.0)\n",
|
|||
|
"Requirement already satisfied: attrs>=22.2.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->notebook->jupyter) (23.2.0)\n",
|
|||
|
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->notebook->jupyter) (2023.12.1)\n",
|
|||
|
"Requirement already satisfied: referencing>=0.28.4 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->notebook->jupyter) (0.35.1)\n",
|
|||
|
"Requirement already satisfied: rpds-py>=0.7.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->notebook->jupyter) (0.18.1)\n",
|
|||
|
"Requirement already satisfied: python-json-logger>=2.0.4 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter) (2.0.7)\n",
|
|||
|
"Requirement already satisfied: pyyaml>=5.3 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter) (6.0.1)\n",
|
|||
|
"Requirement already satisfied: rfc3339-validator in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter) (0.1.4)\n",
|
|||
|
"Requirement already satisfied: rfc3986-validator>=0.1.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter) (0.1.1)\n",
|
|||
|
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from requests>=2.31->jupyterlab-server<3,>=2.27.1->notebook->jupyter) (3.3.2)\n",
|
|||
|
"Requirement already satisfied: urllib3<3,>=1.21.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from requests>=2.31->jupyterlab-server<3,>=2.27.1->notebook->jupyter) (2.2.1)\n",
|
|||
|
"Requirement already satisfied: fqdn in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter) (1.5.1)\n",
|
|||
|
"Requirement already satisfied: isoduration in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter) (20.11.0)\n",
|
|||
|
"Requirement already satisfied: jsonpointer>1.13 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter) (2.4)\n",
|
|||
|
"Requirement already satisfied: uri-template in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter) (1.3.0)\n",
|
|||
|
"Requirement already satisfied: webcolors>=1.11 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter) (1.13)\n",
|
|||
|
"Requirement already satisfied: cffi>=1.0.1 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from argon2-cffi-bindings->argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->notebook->jupyter) (1.16.0)\n",
|
|||
|
"Requirement already satisfied: pycparser in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi>=21.1->jupyter-server<3,>=2.4.0->notebook->jupyter) (2.22)\n",
|
|||
|
"Requirement already satisfied: arrow>=0.15.0 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter) (1.3.0)\n",
|
|||
|
"Requirement already satisfied: types-python-dateutil>=2.8.10 in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (from arrow>=0.15.0->isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->notebook->jupyter) (2.9.0.20240316)\n",
|
|||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
|||
|
"Requirement already satisfied: torch in c:\\users\\dominik\\desktop\\studia\\11.sem1\\en-ner-conll-2003\\.venv\\lib\\site-packages (2.3.0)\n",
|
|||
|
"Collecting torchvision\n",
|
|||
|
" Using cached torchvision-0.18.0-cp312-cp312-win_amd64.whl.metadata (6.6 kB)\n",
|
|||
|
"Collecting torchaudio\n",
|
|||
|
" Using cached torchaudio-2.3.0-cp312-cp312-win_amd64.whl.metadata (6.4 kB)\n",
|
|||
|
"Note: you may need to restart the kernel to use updated packages.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"ERROR: Could not find a version that satisfies the requirement cudatoolkit (from versions: none)\n",
|
|||
|
"ERROR: No matching distribution found for cudatoolkit\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Collecting cuda-python\n",
|
|||
|
" Downloading cuda_python-12.5.0-cp312-cp312-win_amd64.whl.metadata (12 kB)\n",
|
|||
|
"Downloading cuda_python-12.5.0-cp312-cp312-win_amd64.whl (10.1 MB)\n",
|
|||
|
" ---------------------------------------- 0.0/10.1 MB ? eta -:--:--\n",
|
|||
|
" ---------------------------------------- 0.0/10.1 MB ? eta -:--:--\n",
|
|||
|
" ---------------------------------------- 0.0/10.1 MB 660.6 kB/s eta 0:00:16\n",
|
|||
|
" ---------------------------------------- 0.1/10.1 MB 656.4 kB/s eta 0:00:16\n",
|
|||
|
" --------------------------------------- 0.1/10.1 MB 944.1 kB/s eta 0:00:11\n",
|
|||
|
" - -------------------------------------- 0.3/10.1 MB 1.3 MB/s eta 0:00:08\n",
|
|||
|
" - -------------------------------------- 0.4/10.1 MB 1.6 MB/s eta 0:00:07\n",
|
|||
|
" -- ------------------------------------- 0.6/10.1 MB 2.3 MB/s eta 0:00:05\n",
|
|||
|
" --- ------------------------------------ 1.0/10.1 MB 3.0 MB/s eta 0:00:04\n",
|
|||
|
" ----- ---------------------------------- 1.5/10.1 MB 4.1 MB/s eta 0:00:03\n",
|
|||
|
" -------- ------------------------------- 2.1/10.1 MB 5.3 MB/s eta 0:00:02\n",
|
|||
|
" ---------- ----------------------------- 2.6/10.1 MB 5.7 MB/s eta 0:00:02\n",
|
|||
|
" ------------ --------------------------- 3.1/10.1 MB 6.5 MB/s eta 0:00:02\n",
|
|||
|
" ------------ --------------------------- 3.1/10.1 MB 6.5 MB/s eta 0:00:02\n",
|
|||
|
" ------------ --------------------------- 3.1/10.1 MB 6.5 MB/s eta 0:00:02\n",
|
|||
|
" ------------- -------------------------- 3.4/10.1 MB 5.4 MB/s eta 0:00:02\n",
|
|||
|
" ---------------- ----------------------- 4.2/10.1 MB 6.1 MB/s eta 0:00:01\n",
|
|||
|
" ---------------- ----------------------- 4.2/10.1 MB 6.1 MB/s eta 0:00:01\n",
|
|||
|
" ------------------ --------------------- 4.8/10.1 MB 6.1 MB/s eta 0:00:01\n",
|
|||
|
" -------------------- ------------------- 5.2/10.1 MB 6.6 MB/s eta 0:00:01\n",
|
|||
|
" ----------------------- ---------------- 5.8/10.1 MB 6.6 MB/s eta 0:00:01\n",
|
|||
|
" ------------------------ --------------- 6.3/10.1 MB 6.9 MB/s eta 0:00:01\n",
|
|||
|
" ------------------------ --------------- 6.3/10.1 MB 6.5 MB/s eta 0:00:01\n",
|
|||
|
" ----------------------------- ---------- 7.3/10.1 MB 7.2 MB/s eta 0:00:01\n",
|
|||
|
" ----------------------------- ---------- 7.3/10.1 MB 7.2 MB/s eta 0:00:01\n",
|
|||
|
" ------------------------------ --------- 7.7/10.1 MB 7.0 MB/s eta 0:00:01\n",
|
|||
|
" --------------------------------- ------ 8.4/10.1 MB 7.3 MB/s eta 0:00:01\n",
|
|||
|
" ----------------------------------- ---- 9.0/10.1 MB 7.3 MB/s eta 0:00:01\n",
|
|||
|
" -------------------------------------- - 9.8/10.1 MB 7.8 MB/s eta 0:00:01\n",
|
|||
|
" --------------------------------------- 10.1/10.1 MB 7.9 MB/s eta 0:00:01\n",
|
|||
|
" ---------------------------------------- 10.1/10.1 MB 7.6 MB/s eta 0:00:00\n",
|
|||
|
"Installing collected packages: cuda-python\n",
|
|||
|
"Successfully installed cuda-python-12.5.0\n",
|
|||
|
"Note: you may need to restart the kernel to use updated packages.\n",
|
|||
|
"Installed kernelspec cuda in C:\\Users\\Dominik\\AppData\\Roaming\\jupyter\\kernels\\cuda\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"%pip install torch torchtext\n",
|
|||
|
"%pip install torch datasets\n",
|
|||
|
"%pip install ipywidgets\n",
|
|||
|
"%pip install tdqm\n",
|
|||
|
"%pip install pandas\n",
|
|||
|
"%pip install nltk\n",
|
|||
|
"%pip install unidecode\n",
|
|||
|
"!jupyter nbextension enable --py widgetsnbextension\n",
|
|||
|
"%pip install ipykernel jupyter\n",
|
|||
|
"%pip install torch torchvision torchaudio cudatoolkit\n",
|
|||
|
"%pip install cuda-python\n",
|
|||
|
"!python -m ipykernel install --user --name=cuda --display-name \"cuda-gpt\"\n",
|
|||
|
"\n",
|
|||
|
"from collections import Counter\n",
|
|||
|
"import torch\n",
|
|||
|
"from datasets import load_dataset\n",
|
|||
|
"from torchtext.vocab import vocab\n",
|
|||
|
"from tqdm import tqdm\n",
|
|||
|
"from ipywidgets import FloatProgress\n",
|
|||
|
"\n",
|
|||
|
"import pandas as pd\n",
|
|||
|
"from nltk.tokenize import word_tokenize\n",
|
|||
|
"from unidecode import unidecode"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 26,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"False"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 26,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"torch.cuda.is_available()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 7,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"wczytano dane treningowe\n",
|
|||
|
"O B-PER O O O O O O O O O B-LOC O O O O O O O O O O O B-LOC O O B-PER I-PER O O O O O O O O O O O O O O O O O O O O O O O B-LOC O O O O O O O O O O O O O B-MISC I-MISC I-MISC I-MISC O O O B-PER O O O O O B-LOC O O O O O O O O O O O O O O O O O B-MISC O O B-LOC O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O B-PER O O O B-PER I-PER O O O O O O O O O O O O O O O O O O O O O O O O B-PER O O O O O O O O B-MISC O O O O O O O O O O O O O O O O O O O O O O O O Rare Hendrix song draft sells for almost $ 17,000 . </S> LONDON 1996-08-22 </S> A rare early handwritten draft of a song by U.S. guitar legend Jimi Hendrix was sold for almost $ 17,000 on Thursday at an auction of some of the late musician 's favourite possessions . </S> A Florida restaurant paid 10,925 pounds ( $ 16,935 ) for the draft of \" Ai n't no telling \" , which Hendrix penned on a piece of London hotel stationery in late 1966 . </S> At the end of a January 1967 concert in the English city of Nottingham he threw the sheet of paper into the audience , where it was retrieved by a fan . </S> Buyers also snapped up 16 other items that were put up for auction by Hendrix 's former girlfriend Kathy Etchingham , who lived with him from 1966 to 1969 . </S> They included a black lacquer and mother of pearl inlaid box used by Hendrix to store his drugs , which an anonymous Australian purchaser bought for 5,060 pounds ( $ 7,845 ) . </S> The guitarist died of a drugs overdose in 1970 aged 27 . </S>\n",
|
|||
|
"podzielono dane treningowe na słowa\n",
|
|||
|
"['rare', 'hendrix', 'song', 'draft', 'sells', 'for', 'almost', '$', '17,000', '.', '</s>', 'london', '1996-08-22', '</s>', 'a', 'rare', 'early', 'handwritten', 'draft', 'of', 'a', 'song', 'by', 'u.s.', 'guitar', 'legend', 'jimi', 'hendrix', 'was', 'sold', 'for', 'almost', '$', '17,000', 'on', 'thursday', 'at', 'an', 'auction', 'of', 'some', 'of', 'the', 'late', 'musician', \"'s\", 'favourite', 'possessions', '.', '</s>', 'a', 'florida', 'restaurant', 'paid', '10,925', 'pounds', '(', '$', '16,935', ')', 'for', 'the', 'draft', 'of', '\"', 'ai', \"n't\", 'no', 'telling', '\"', ',', 'which', 'hendrix', 'penned', 'on', 'a', 'piece', 'of', 'london', 'hotel', 'stationery', 'in', 'late', '1966', '.', '</s>', 'at', 'the', 'end', 'of', 'a', 'january', '1967', 'concert', 'in', 'the', 'english', 'city', 'of', 'nottingham', 'he', 'threw', 'the', 'sheet', 'of', 'paper', 'into', 'the', 'audience', ',', 'where', 'it', 'was', 'retrieved', 'by', 'a', 'fan', '.', '</s>', 'buyers', 'also', 'snapped', 'up', '16', 'other', 'items', 'that', 'were', 'put', 'up', 'for', 'auction', 'by', 'hendrix', \"'s\", 'former', 'girlfriend', 'kathy', 'etchingham', ',', 'who', 'lived', 'with', 'him', 'from', '1966', 'to', '1969', '.', '</s>', 'they', 'included', 'a', 'black', 'lacquer', 'and', 'mother', 'of', 'pearl', 'inlaid', 'box', 'used', 'by', 'hendrix', 'to', 'store', 'his', 'drugs', ',', 'which', 'an', 'anonymous', 'australian', 'purchaser', 'bought', 'for', '5,060', 'pounds', '(', '$', '7,845', ')', '.', '</s>', 'the', 'guitarist', 'died', 'of', 'a', 'drugs', 'overdose', 'in', '1970', 'aged', '27', '.', '</s>']\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# odczytaj dane treningowe\n",
|
|||
|
"train = pd.read_csv('train/train.tsv', sep='\\t')\n",
|
|||
|
"train.columns = [\"y\", \"x\"]\n",
|
|||
|
"print(\"wczytano dane treningowe\")\n",
|
|||
|
"print(train[\"y\"][0], train[\"x\"][0])\n",
|
|||
|
"\n",
|
|||
|
"# podziel dane treningowe na słowa\n",
|
|||
|
"# https://www.geeksforgeeks.org/python-word-embedding-using-word2vec/\n",
|
|||
|
"slowa_train = []\n",
|
|||
|
"for tekst in train[\"x\"]:\n",
|
|||
|
" pom = []\n",
|
|||
|
" for slowo in tekst.split(\" \"):\n",
|
|||
|
" #if slowo not in (\"<\",\"/s\",\">\",\"/S\",\"``\"):\n",
|
|||
|
" pom.append(slowo.lower())\n",
|
|||
|
" slowa_train.append(pom)\n",
|
|||
|
"print(\"podzielono dane treningowe na słowa\")\n",
|
|||
|
"print(slowa_train[0])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 8,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"wczytano dane testowe dev-0\n",
|
|||
|
"CRICKET - ENGLISH COUNTY CHAMPIONSHIP SCORES . </S> LONDON 1996-08-30 </S> Result and close of play scores in English county championship matches on Friday : </S> Leicester : Leicestershire beat Somerset by an innings and 39 runs . </S> Somerset 83 and 174 ( P. Simmons 4-38 ) , Leicestershire 296 . </S> Leicestershire 22 points , Somerset 4 . </S> Chester-le-Street : Glamorgan 259 and 207 ( A. Dale 69 , H. Morris 69 ; D. Blenkiron 4-43 ) , Durham 114 ( S. Watkin 4-28 ) and 81-3 . </S> Tunbridge Wells : Nottinghamshire 214 ( P. Johnson 84 ; M. McCague 4-55 ) , Kent 108-3 . </S> London ( The Oval ) : Warwickshire 195 , Surrey 429-7 ( C. Lewis 80 not out , M. Butcher 70 , G. Kersey 63 , J. Ratcliffe 63 , D. Bicknell 55 ) . </S> Hove : Sussex 363 ( W. Athey 111 , V. Drakes 52 ; I. Austin 4-37 ) , Lancashire 197-8 ( W. Hegg 54 ) </S> Portsmouth : Middlesex 199 and 426 ( J. Pooley 111 , M. Ramprakash 108 , M. Gatting 83 ) , Hampshire 232 and 109-5 . </S> Chesterfield : Worcestershire 238 and 133-5 , Derbyshire 471 ( J. Adams 123 , T.O'Gorman 109 not out , K. Barnett 87 ; T. Moody 6-82 ) </S> Bristol : Gloucestershire 183 and 185-6 ( J. Russell 56 not out ) , Northamptonshire 190 ( K. Curran 52 ; A. Smith 5-68 ) . </S>\n",
|
|||
|
"podzielono dane treningowe na słowa\n",
|
|||
|
"['cricket', '-', 'english', 'county', 'championship', 'scores', '.', '</s>', 'london', '1996-08-30', '</s>', 'result', 'and', 'close', 'of', 'play', 'scores', 'in', 'english', 'county', 'championship', 'matches', 'on', 'friday', ':', '</s>', 'leicester', ':', 'leicestershire', 'beat', 'somerset', 'by', 'an', 'innings', 'and', '39', 'runs', '.', '</s>', 'somerset', '83', 'and', '174', '(', 'p.', 'simmons', '4-38', ')', ',', 'leicestershire', '296', '.', '</s>', 'leicestershire', '22', 'points', ',', 'somerset', '4', '.', '</s>', 'chester-le-street', ':', 'glamorgan', '259', 'and', '207', '(', 'a.', 'dale', '69', ',', 'h.', 'morris', '69', ';', 'd.', 'blenkiron', '4-43', ')', ',', 'durham', '114', '(', 's.', 'watkin', '4-28', ')', 'and', '81-3', '.', '</s>', 'tunbridge', 'wells', ':', 'nottinghamshire', '214', '(', 'p.', 'johnson', '84', ';', 'm.', 'mccague', '4-55', ')', ',', 'kent', '108-3', '.', '</s>', 'london', '(', 'the', 'oval', ')', ':', 'warwickshire', '195', ',', 'surrey', '429-7', '(', 'c.', 'lewis', '80', 'not', 'out', ',', 'm.', 'butcher', '70', ',', 'g.', 'kersey', '63', ',', 'j.', 'ratcliffe', '63', ',', 'd.', 'bicknell', '55', ')', '.', '</s>', 'hove', ':', 'sussex', '363', '(', 'w.', 'athey', '111', ',', 'v.', 'drakes', '52', ';', 'i.', 'austin', '4-37', ')', ',', 'lancashire', '197-8', '(', 'w.', 'hegg', '54', ')', '</s>', 'portsmouth', ':', 'middlesex', '199', 'and', '426', '(', 'j.', 'pooley', '111', ',', 'm.', 'ramprakash', '108', ',', 'm.', 'gatting', '83', ')', ',', 'hampshire', '232', 'and', '109-5', '.', '</s>', 'chesterfield', ':', 'worcestershire', '238', 'and', '133-5', ',', 'derbyshire', '471', '(', 'j.', 'adams', '123', ',', \"t.o'gorman\", '109', 'not', 'out', ',', 'k.', 'barnett', '87', ';', 't.', 'moody', '6-82', ')', '</s>', 'bristol', ':', 'gloucestershire', '183', 'and', '185-6', '(', 'j.', 'russell', '56', 'not', 'out', ')', ',', 'northamptonshire', '190', '(', 'k.', 'curran', '52', ';', 'a.', 'smith', '5-68', ')', '.', '</s>']\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# odczytaj dane testowe dev-0\n",
|
|||
|
"test_dev0 = pd.read_csv('dev-0/in.tsv', sep='\\t')\n",
|
|||
|
"test_dev0.columns = [\"x\"]\n",
|
|||
|
"print(\"wczytano dane testowe dev-0\")\n",
|
|||
|
"print(test_dev0[\"x\"][0])\n",
|
|||
|
"\n",
|
|||
|
"# podziel dane testowe na słowa\n",
|
|||
|
"# https://www.geeksforgeeks.org/python-word-embedding-using-word2vec/\n",
|
|||
|
"slowa_test_dev0 = []\n",
|
|||
|
"for tekst in test_dev0[\"x\"]:\n",
|
|||
|
" pom = []\n",
|
|||
|
" for slowo in tekst.split(\" \"):\n",
|
|||
|
" #if slowo not in (\"<\",\"/s\",\">\",\"/S\",\"``\"):\n",
|
|||
|
" pom.append(slowo.lower())\n",
|
|||
|
" slowa_test_dev0.append(pom)\n",
|
|||
|
"print(\"podzielono dane treningowe na słowa\")\n",
|
|||
|
"print(slowa_test_dev0[0])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 9,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"wczytano dane testowe A\n",
|
|||
|
"RUGBY UNION - CUTTITTA BACK FOR ITALY AFTER A YEAR . </S> ROME 1996-12-06 </S> Italy recalled Marcello Cuttitta </S> on Friday for their friendly against Scotland at Murrayfield more than a year after the 30-year-old wing announced he was retiring following differences over selection . </S> Cuttitta , who trainer George Coste said was certain to play on Saturday week , was named in a 21-man squad lacking only two of the team beaten 54-21 by England at Twickenham last month . </S> Stefano Bordon is out through illness and Coste said he had dropped back row Corrado Covi , who had been recalled for the England game after five years out of the national team . </S> Cuttitta announced his retirement after the 1995 World Cup , where he took issue with being dropped from the Italy side that faced England in the pool stages . </S> Coste said he had approached the player two months ago about a comeback . </S> \" He ended the World Cup on the wrong note , \" Coste said . </S> \" I thought it would be useful to have him back and he said he would be available . </S> I think now is the right time for him to return . \" </S> Squad : Javier Pertile , Paolo Vaccari , Marcello Cuttitta , Ivan Francescato , Leandro Manteri , Diego Dominguez , Francesco Mazzariol , Alessandro Troncon , Orazio Arancio , Andrea Sgorlon , Massimo Giovanelli , Carlo Checchinato , Walter Cristofoletto , Franco Properzi Curti , Carlo Orlandi , Massimo Cuttitta , Giambatista Croci , Gianluca Guidi , Nicola Mazzucato , Alessandro Moscardi , Andrea Castellani . </S>\n",
|
|||
|
"podzielono dane treningowe na słowa\n",
|
|||
|
"['rugby', 'union', '-', 'cuttitta', 'back', 'for', 'italy', 'after', 'a', 'year', '.', '</s>', 'rome', '1996-12-06', '</s>', 'italy', 'recalled', 'marcello', 'cuttitta', '</s>', 'on', 'friday', 'for', 'their', 'friendly', 'against', 'scotland', 'at', 'murrayfield', 'more', 'than', 'a', 'year', 'after', 'the', '30-year-old', 'wing', 'announced', 'he', 'was', 'retiring', 'following', 'differences', 'over', 'selection', '.', '</s>', 'cuttitta', ',', 'who', 'trainer', 'george', 'coste', 'said', 'was', 'certain', 'to', 'play', 'on', 'saturday', 'week', ',', 'was', 'named', 'in', 'a', '21-man', 'squad', 'lacking', 'only', 'two', 'of', 'the', 'team', 'beaten', '54-21', 'by', 'england', 'at', 'twickenham', 'last', 'month', '.', '</s>', 'stefano', 'bordon', 'is', 'out', 'through', 'illness', 'and', 'coste', 'said', 'he', 'had', 'dropped', 'back', 'row', 'corrado', 'covi', ',', 'who', 'had', 'been', 'recalled', 'for', 'the', 'england', 'game', 'after', 'five', 'years', 'out', 'of', 'the', 'national', 'team', '.', '</s>', 'cuttitta', 'announced', 'his', 'retirement', 'after', 'the', '1995', 'world', 'cup', ',', 'where', 'he', 'took', 'issue', 'with', 'being', 'dropped', 'from', 'the', 'italy', 'side', 'that', 'faced', 'england', 'in', 'the', 'pool', 'stages', '.', '</s>', 'coste', 'said', 'he', 'had', 'approached', 'the', 'player', 'two', 'months', 'ago', 'about', 'a', 'comeback', '.', '</s>', '\"', 'he', 'ended', 'the', 'world', 'cup', 'on', 'the', 'wrong', 'note', ',', '\"', 'coste', 'said', '.', '</s>', '\"', 'i', 'thought', 'it', 'would', 'be', 'useful', 'to', 'have', 'him', 'back', 'and', 'he', 'said', 'he', 'would', 'be', 'available', '.', '</s>', 'i', 'think', 'now', 'is', 'the', 'right', 'time', 'for', 'him', 'to', 'return', '.', '\"', '</s>', 'squad', ':', 'javier', 'pertile', ',', 'paolo', 'vaccari', ',', 'marcello', 'cuttitta', ',', 'ivan', 'francescato', ',', 'leandro', 'manteri', ',', 'diego', 'dominguez', ',', 'francesco', 'mazzariol', ',', 'alessandro', 'troncon', ',', 'orazio', 'arancio', ',', 'andrea', 'sgorlon', ',', 'massimo', 'giovanelli', ',', 'carlo', 'checchinato', ',', 'walter', 'cristofoletto', ',', 'franco', 'properzi', 'curti', ',', 'carlo', 'orlandi', ',', 'massimo', 'cuttitta', ',', 'giambatista', 'croci', ',', 'gianluca', 'guidi', ',', 'nicola', 'mazzucato', ',', 'alessandro', 'moscardi', ',', 'andrea', 'castellani', '.', '</s>']\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# odczytaj dane testowe A\n",
|
|||
|
"test_A = pd.read_csv('test-A/in.tsv', sep='\\t')\n",
|
|||
|
"test_A.columns = [\"x\"]\n",
|
|||
|
"print(\"wczytano dane testowe A\")\n",
|
|||
|
"print(test_A[\"x\"][0])\n",
|
|||
|
"\n",
|
|||
|
"# podziel dane testowe na słowa\n",
|
|||
|
"# https://www.geeksforgeeks.org/python-word-embedding-using-word2vec/\n",
|
|||
|
"slowa_test_A = []\n",
|
|||
|
"for tekst in test_A[\"x\"]:\n",
|
|||
|
" pom = []\n",
|
|||
|
" for slowo in tekst.split(\" \"):\n",
|
|||
|
" #if slowo not in (\"<\",\"/s\",\">\",\"/S\",\"``\"):\n",
|
|||
|
" pom.append(slowo.lower())\n",
|
|||
|
" slowa_test_A.append(pom)\n",
|
|||
|
"print(\"podzielono dane treningowe na słowa\")\n",
|
|||
|
"print(slowa_test_A[0])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 10,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def build_vocab(dataset):\n",
|
|||
|
" counter = Counter()\n",
|
|||
|
" for document in dataset:\n",
|
|||
|
" counter.update(document)\n",
|
|||
|
" return vocab(counter, specials=[\"<unk>\", \"<pad>\", \"<bos>\", \"<eos>\"])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"20998\n",
|
|||
|
"['<unk>', '<pad>', '<bos>', '<eos>', 'rare', 'hendrix', 'song', 'draft', 'sells', 'for', 'almost', '$', '17,000', '.', '</s>', 'london', '1996-08-22', 'a', 'early', 'handwritten', 'of', 'by', 'u.s.', 'guitar', 'legend', 'jimi', 'was', 'sold', 'on', 'thursday', 'at', 'an', 'auction', 'some', 'the', 'late', 'musician', \"'s\", 'favourite', 'possessions', 'florida', 'restaurant', 'paid', '10,925', 'pounds', '(', '16,935', ')', '\"', 'ai', \"n't\", 'no', 'telling', ',', 'which', 'penned', 'piece', 'hotel', 'stationery', 'in', '1966', 'end', 'january', '1967', 'concert', 'english', 'city', 'nottingham', 'he', 'threw', 'sheet', 'paper', 'into', 'audience', 'where', 'it', 'retrieved', 'fan', 'buyers', 'also', 'snapped', 'up', '16', 'other', 'items', 'that', 'were', 'put', 'former', 'girlfriend', 'kathy', 'etchingham', 'who', 'lived', 'with', 'him', 'from', 'to', '1969', 'they', 'included', 'black', 'lacquer', 'and', 'mother', 'pearl', 'inlaid', 'box', 'used', 'store', 'his', 'drugs', 'anonymous', 'australian', 'purchaser', 'bought', '5,060', '7,845', 'guitarist', 'died', 'overdose', '1970', 'aged', '27', 'china', 'says', 'taiwan', 'spoils', 'atmosphere', 'talks', 'beijing', 'accused', 'taipei', 'spoiling', 'resumption', 'across', 'strait', 'visit', 'ukraine', 'taiwanese', 'vice', 'president', 'lien', 'chan', 'this', 'week', 'infuriated', 'speaking', 'only', 'hours', 'after', 'chinese', 'state', 'media', 'said', 'time', 'right', 'engage', 'political', 'foreign', 'ministry', 'spokesman', 'shen', 'guofang', 'told', 'reuters', ':', 'necessary', 'opening', 'has', 'been', 'disrupted', 'authorities', 'quoted', 'top', 'negotiator', 'tang', 'shubei', 'as', 'visiting', 'group', 'wednesday', 'rivals', 'hold', 'now', 'is', 'two', 'sides', '...', 'hostility', 'overseas', 'edition', 'people', 'daily', 'saying', 'television', 'interview', 'had', 'read', 'reports', 'comments', 'but', 'gave', 'details', 'why', 'considered', 'considers', 'renegade', 'province', 'long', 'opposed', 'all', 'efforts', 'gain', 'greater', 'international', 'recognition', 'rival', 'island', 'should', 'take', 'practical', 'steps', 'towards', 'goal', 'consultations', 'be', 'held', 'set', 'format', 'official', 'xinhua', 'news', 'agency', 'executive', 'chairman', 'association', 'relations', 'straits', 'german', 'july', 'car', 'registrations', '14.2', 'pct', 'yr', '/', 'frankfurt', 'first-time', 'motor', 'vehicles', 'jumped', 'percent', 'year', 'year-earlier', 'period', 'federal', 'office', '356,725', 'new', 'cars', 'registered', '1996', '--', '304,850', 'passenger', '15,613', 'trucks', 'figures', 'represent', '13.6', 'increase', '2.2', 'decline', '1995', 'motor-bike', 'registration', 'rose', '32.7', 'growth', 'partly', 'due', 'increased', 'number', 'germans', 'buying', 'abroad', 'while', 'manufacturers', 'domestic', 'demand', 'weak', 'posted', 'gains', 'numbers', 'volkswagen', 'ag', 'won', '77,719', 'slightly', 'more', 'than', 'quarter', 'total', 'opel', 'together', 'general', 'motors', 'came', 'second', 'place', '49,269', '16.4', 'overall', 'figure', 'third', 'ford', '35,563', 'or', '11.7', 'seat', 'porsche', 'fewer', 'compared', 'last', '3,420', '5522', 'earlier', 'fell', '554', '643', 'greek', 'socialists', 'give', 'green', 'light', 'pm', 'elections', 'athens', 'socialist', 'party', 'bureau', 'prime', 'minister', 'costas', 'simitis', 'call', 'snap', 'its', 'secretary', 'skandalidis', 'reporters', 'going', 'make', 'announcement', 'cabinet', 'meeting', 'later', 'dimitris', 'kontogiannis', 'newsroom', '+301', '3311812-4', 'bayervb', 'sets', 'c$', '100', 'million', 'six-year', 'bond', 'following', 'announced', 'lead', 'manager', 'toronto', 'dominion', 'borrower', 'bayerische', 'vereinsbank', 'amt', 'mln', 'coupon', '6.625', 'maturity', '24.sep.02', 'type', 'straight', 'iss', 'price', '100.92', 'pay', 'date', '24.sep.96', 'full', 'fees', '1.875', 'reoffer', '99.32', 'spread', '+20', 'bp', 'moody', 'aa1', 'listing', 'lux', 'freq', '=', 's&p', 'denoms', 'k', '1-10-100', 'sale', 'limits', 'us', 'uk', 'ca', 'neg', 'plg', 'crs', 'deflt', 'force', 'maj', 'gov', 'law', 'home
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"v = build_vocab(slowa_train)\n",
|
|||
|
"v.set_default_index(v[\"<unk>\"])\n",
|
|||
|
"itos = v.get_itos() # mapowanie indeksów na tokeny\n",
|
|||
|
"print(len(itos)) # liczba różnych tokenów w słowniku\n",
|
|||
|
"print(itos)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 12,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"{'O': 0, 'B-PER': 1, 'B-LOC': 2, 'I-PER': 3, 'B-MISC': 4, 'I-MISC': 5, 'I-LOC': 6, 'B-ORG': 7, 'I-ORG': 8}\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# slownik etykiety - kody etykiet\n",
|
|||
|
"etykieta_na_kod = {}\n",
|
|||
|
"licznik = 0\n",
|
|||
|
"for tekst in train[\"y\"]:\n",
|
|||
|
" for etykieta in tekst.split(\" \"):\n",
|
|||
|
" if etykieta not in etykieta_na_kod:\n",
|
|||
|
" etykieta_na_kod[etykieta] = licznik\n",
|
|||
|
" licznik+=1\n",
|
|||
|
"print(etykieta_na_kod)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 13,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 5, 5, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# podziel etykiety\n",
|
|||
|
"kody_etykiet_train = []\n",
|
|||
|
"for tekst in train[\"y\"]:\n",
|
|||
|
" pom = []\n",
|
|||
|
" for etykieta in tekst.split(\" \"):\n",
|
|||
|
" pom.append(etykieta_na_kod[etykieta])\n",
|
|||
|
" kody_etykiet_train.append(pom)\n",
|
|||
|
"print(kody_etykiet_train[0])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 14,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"O O B-MISC I-MISC I-MISC O O O B-LOC O O O O O O O O O B-MISC O O O O O O O B-LOC O B-ORG O B-ORG O O O O O O O O B-ORG O O O O B-PER I-PER O O O B-ORG O O O B-ORG O O O B-ORG O O O B-LOC O B-ORG O O O O B-PER I-PER O O B-PER I-PER O O B-PER I-PER O O O B-ORG O O B-PER I-PER O O O O O O B-LOC I-LOC O B-ORG O O B-PER I-PER O O B-PER I-PER O O O B-ORG O O O B-LOC O B-LOC I-LOC O O B-ORG O O B-ORG O O B-PER I-PER O O O O B-PER I-PER O O B-PER I-PER O O B-PER I-PER O O B-PER I-PER O O O O B-LOC O B-ORG O O B-PER I-PER O O B-PER I-PER O O B-PER I-PER O O O B-ORG O O B-PER I-PER O O O B-LOC O B-ORG O O O O B-PER I-PER O O B-PER I-PER O O B-PER I-PER O O O B-ORG O O O O O B-LOC O B-ORG O O O O B-ORG O O B-PER I-PER O O B-PER O O O O B-PER I-PER O O B-PER I-PER O O O B-LOC O B-ORG O O O O B-PER I-PER O O O O O B-ORG O O B-PER I-PER O O B-PER I-PER O O O O\n",
|
|||
|
"[0, 0, 4, 5, 5, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 2, 0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 1, 3, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0, 2, 0, 7, 0, 0, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 0, 7, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 2, 6, 0, 7, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 0, 7, 0, 0, 0, 2, 0, 2, 6, 0, 0, 7, 0, 0, 7, 0, 0, 1, 3, 0, 0, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 0, 0, 2, 0, 7, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 0, 7, 0, 0, 1, 3, 0, 0, 0, 2, 0, 7, 0, 0, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 0, 7, 0, 0, 0, 0, 0, 2, 0, 7, 0, 0, 0, 0, 7, 0, 0, 1, 3, 0, 0, 1, 0, 0, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 0, 2, 0, 7, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 7, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 0, 0]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# odczytaj etykiety dev-0\n",
|
|||
|
"labels_dev0 = pd.read_csv('dev-0/expected.tsv', sep='\\t')\n",
|
|||
|
"labels_dev0.columns = [\"y\"]\n",
|
|||
|
"print(labels_dev0[\"y\"][0])\n",
|
|||
|
"\n",
|
|||
|
"# podziel etykiety\n",
|
|||
|
"kody_etykiet_dev0 = []\n",
|
|||
|
"for tekst in labels_dev0[\"y\"]:\n",
|
|||
|
" pom = []\n",
|
|||
|
" for etykieta in tekst.split(\" \"):\n",
|
|||
|
" pom.append(etykieta_na_kod[etykieta])\n",
|
|||
|
" kody_etykiet_dev0.append(pom)\n",
|
|||
|
"print(kody_etykiet_dev0[0])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 15,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def data_process(dt):\n",
|
|||
|
" # Wektoryzacja dokumentów tekstowych.\n",
|
|||
|
" return [\n",
|
|||
|
" torch.tensor(\n",
|
|||
|
" [v[\"<bos>\"]] + [v[token] for token in document] + [v[\"<eos>\"]],\n",
|
|||
|
" dtype=torch.long,\n",
|
|||
|
" )\n",
|
|||
|
" for document in dt\n",
|
|||
|
" ]\n",
|
|||
|
"\n",
|
|||
|
"def labels_process(dt):\n",
|
|||
|
" # Wektoryzacja etykiet (NER)\n",
|
|||
|
" return [torch.tensor([0] + document + [0], dtype=torch.long) for document in dt]"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 16,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"train_tokens_ids = data_process(slowa_train)\n",
|
|||
|
"test_dev0_tokens_ids = data_process(slowa_test_dev0)\n",
|
|||
|
"test_A_tokens_ids = data_process(slowa_test_A)\n",
|
|||
|
"\n",
|
|||
|
"train_labels = labels_process(kody_etykiet_train)\n",
|
|||
|
"test_dev0_labels = labels_process(kody_etykiet_dev0)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 17,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"944 199\n",
|
|||
|
"214 256\n",
|
|||
|
"229 283\n",
|
|||
|
"tensor([ 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n",
|
|||
|
" 14, 17, 4, 18, 19, 7, 20, 17, 6, 21, 22, 23, 24, 25,\n",
|
|||
|
" 5, 26, 27, 9, 10, 11, 12, 28, 29, 30, 31, 32, 20, 33,\n",
|
|||
|
" 20, 34, 35, 36, 37, 38, 39, 13, 14, 17, 40, 41, 42, 43,\n",
|
|||
|
" 44, 45, 11, 46, 47, 9, 34, 7, 20, 48, 49, 50, 51, 52,\n",
|
|||
|
" 48, 53, 54, 5, 55, 28, 17, 56, 20, 15, 57, 58, 59, 35,\n",
|
|||
|
" 60, 13, 14, 30, 34, 61, 20, 17, 62, 63, 64, 59, 34, 65,\n",
|
|||
|
" 66, 20, 67, 68, 69, 34, 70, 20, 71, 72, 34, 73, 53, 74,\n",
|
|||
|
" 75, 26, 76, 21, 17, 77, 13, 14, 78, 79, 80, 81, 82, 83,\n",
|
|||
|
" 84, 85, 86, 87, 81, 9, 32, 21, 5, 37, 88, 89, 90, 91,\n",
|
|||
|
" 53, 92, 93, 94, 95, 96, 60, 97, 98, 13, 14, 99, 100, 17,\n",
|
|||
|
" 101, 102, 103, 104, 20, 105, 106, 107, 108, 21, 5, 97, 109, 110,\n",
|
|||
|
" 111, 53, 54, 31, 112, 113, 114, 115, 9, 116, 44, 45, 11, 117,\n",
|
|||
|
" 47, 13, 14, 34, 118, 119, 20, 17, 111, 120, 59, 121, 122, 123,\n",
|
|||
|
" 13, 14, 3])\n",
|
|||
|
"tensor([ 2, 1949, 459, 65, 1950, 1951, 1592, 13, 14, 15,\n",
|
|||
|
" 19342, 14, 1793, 103, 1465, 20, 1952, 1592, 59, 65,\n",
|
|||
|
" 1950, 1951, 1954, 28, 947, 166, 14, 1992, 166, 1993,\n",
|
|||
|
" 1703, 1965, 21, 31, 2038, 103, 3671, 2932, 13, 14,\n",
|
|||
|
" 1965, 6226, 103, 16331, 45, 1995, 1996, 0, 47, 53,\n",
|
|||
|
" 1993, 0, 13, 14, 1993, 1055, 1330, 53, 1965, 1864,\n",
|
|||
|
" 13, 14, 17021, 166, 1991, 19322, 103, 14088, 45, 1977,\n",
|
|||
|
" 0, 1620, 53, 10801, 12466, 1620, 1962, 1958, 0, 0,\n",
|
|||
|
" 47, 53, 1956, 19326, 45, 1960, 19327, 19328, 47, 103,\n",
|
|||
|
" 16667, 13, 14, 19313, 1363, 166, 2012, 0, 45, 1995,\n",
|
|||
|
" 2752, 5725, 1962, 1967, 0, 0, 47, 53, 1985, 0,\n",
|
|||
|
" 13, 14, 15, 45, 34, 2037, 47, 166, 2020, 14779,\n",
|
|||
|
" 53, 2018, 0, 45, 2030, 2059, 5455, 620, 618, 53,\n",
|
|||
|
" 1967, 0, 1602, 53, 1963, 0, 1976, 53, 1974, 0,\n",
|
|||
|
" 1976, 53, 1958, 0, 3843, 47, 13, 14, 19318, 166,\n",
|
|||
|
" 2002, 16329, 45, 2024, 19320, 9379, 53, 2007, 2008, 1979,\n",
|
|||
|
" 1962, 2061, 10865, 0, 47, 53, 2034, 0, 45, 2024,\n",
|
|||
|
" 0, 2054, 47, 14, 6206, 166, 12584, 11568, 103, 11269,\n",
|
|||
|
" 45, 1974, 19334, 9379, 53, 1967, 19335, 1997, 53, 1967,\n",
|
|||
|
" 17052, 6226, 47, 53, 2000, 15584, 103, 0, 13, 14,\n",
|
|||
|
" 9493, 166, 2026, 10970, 103, 0, 53, 9314, 0, 45,\n",
|
|||
|
" 1974, 2717, 0, 53, 0, 6237, 620, 618, 53, 6223,\n",
|
|||
|
" 19332, 11058, 1962, 6227, 401, 0, 47, 14, 9488, 166,\n",
|
|||
|
" 1972, 19340, 103, 0, 45, 1974, 1975, 4451, 620, 618,\n",
|
|||
|
" 47, 53, 2010, 14739, 45, 6223, 6224, 1979, 1962, 1977,\n",
|
|||
|
" 4839, 1981, 47, 13, 14, 3])\n",
|
|||
|
"tensor([ 2, 6342, 769, 459, 0, 960, 9, 1681, 150, 17,\n",
|
|||
|
" 253, 13, 14, 5474, 0, 14, 1681, 3063, 0, 0,\n",
|
|||
|
" 14, 28, 947, 9, 701, 7189, 572, 2124, 30, 0,\n",
|
|||
|
" 300, 301, 17, 253, 150, 34, 14863, 6363, 371, 68,\n",
|
|||
|
" 26, 3333, 370, 3631, 608, 11618, 13, 14, 0, 53,\n",
|
|||
|
" 92, 1738, 1753, 0, 154, 26, 3388, 97, 1952, 28,\n",
|
|||
|
" 3978, 145, 53, 26, 2116, 59, 17, 0, 2099, 14403,\n",
|
|||
|
" 148, 186, 20, 34, 695, 2519, 0, 21, 1208, 30,\n",
|
|||
|
" 0, 324, 729, 13, 14, 2725, 0, 185, 618, 863,\n",
|
|||
|
" 1521, 103, 0, 154, 68, 197, 2954, 960, 2955, 0,\n",
|
|||
|
" 0, 53, 92, 197, 170, 3063, 9, 34, 1208, 2154,\n",
|
|||
|
" 150, 1824, 1053, 618, 20, 34, 457, 695, 13, 14,\n",
|
|||
|
" 0, 371, 110, 12530, 150, 34, 274, 1593, 1711, 53,\n",
|
|||
|
" 74, 68, 596, 452, 94, 1458, 2954, 96, 34, 1681,\n",
|
|||
|
" 749, 85, 2517, 1208, 59, 34, 8797, 9174, 13, 14,\n",
|
|||
|
" 0, 154, 68, 197, 4705, 34, 6392, 186, 836, 2521,\n",
|
|||
|
" 700, 17, 17097, 13, 14, 48, 68, 1240, 34, 1593,\n",
|
|||
|
" 1711, 28, 34, 4645, 3370, 53, 48, 0, 154, 13,\n",
|
|||
|
" 14, 48, 1500, 1798, 75, 693, 226, 5612, 97, 606,\n",
|
|||
|
" 95, 960, 103, 68, 154, 68, 693, 226, 1221, 13,\n",
|
|||
|
" 14, 1500, 4604, 184, 185, 34, 156, 155, 9, 95,\n",
|
|||
|
" 97, 671, 13, 48, 14, 2099, 166, 2718, 0, 53,\n",
|
|||
|
" 0, 16807, 53, 0, 0, 53, 2886, 0, 53, 0,\n",
|
|||
|
" 0, 53, 2854, 0, 53, 10959, 0, 53, 11542, 0,\n",
|
|||
|
" 53, 0, 0, 53, 2219, 0, 53, 0, 0, 53,\n",
|
|||
|
" 4036, 0, 53, 17118, 0, 53, 11460, 0, 0, 53,\n",
|
|||
|
" 4036, 0, 53, 0, 0, 53, 0, 0, 53, 9462,\n",
|
|||
|
" 0, 53, 13541, 0, 53, 11542, 0, 53, 2219, 0,\n",
|
|||
|
" 13, 14, 3])\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(len(train_tokens_ids), len(train_tokens_ids[0]))\n",
|
|||
|
"print(len(test_dev0_tokens_ids), len(test_dev0_tokens_ids[0]))\n",
|
|||
|
"print(len(test_A_tokens_ids), len(test_A_tokens_ids[0]))\n",
|
|||
|
"\n",
|
|||
|
"print(train_tokens_ids[0])\n",
|
|||
|
"print(test_dev0_tokens_ids[0])\n",
|
|||
|
"print(test_A_tokens_ids[0])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 18,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"944 199\n",
|
|||
|
"214 256\n",
|
|||
|
"tensor([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|||
|
" 2, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|||
|
" 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 5, 5, 0, 0,\n",
|
|||
|
" 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|||
|
" 0, 4, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|||
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 3, 0, 0, 0, 0,\n",
|
|||
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,\n",
|
|||
|
" 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|||
|
" 0, 0, 0, 0, 0, 0, 0])\n",
|
|||
|
"tensor([0, 0, 0, 4, 5, 5, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0,\n",
|
|||
|
" 0, 0, 0, 2, 0, 7, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 1, 3, 0,\n",
|
|||
|
" 0, 0, 7, 0, 0, 0, 7, 0, 0, 0, 7, 0, 0, 0, 2, 0, 7, 0, 0, 0, 0, 1, 3, 0,\n",
|
|||
|
" 0, 1, 3, 0, 0, 1, 3, 0, 0, 0, 7, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 2, 6, 0,\n",
|
|||
|
" 7, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 0, 7, 0, 0, 0, 2, 0, 2, 6, 0, 0, 7, 0,\n",
|
|||
|
" 0, 7, 0, 0, 1, 3, 0, 0, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 1, 3,\n",
|
|||
|
" 0, 0, 0, 0, 2, 0, 7, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 0, 7, 0,\n",
|
|||
|
" 0, 1, 3, 0, 0, 0, 2, 0, 7, 0, 0, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 1, 3, 0,\n",
|
|||
|
" 0, 0, 7, 0, 0, 0, 0, 0, 2, 0, 7, 0, 0, 0, 0, 7, 0, 0, 1, 3, 0, 0, 1, 0,\n",
|
|||
|
" 0, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 0, 2, 0, 7, 0, 0, 0, 0, 1, 3, 0, 0, 0,\n",
|
|||
|
" 0, 0, 7, 0, 0, 1, 3, 0, 0, 1, 3, 0, 0, 0, 0, 0])\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(len(train_labels), len(train_labels[0]))\n",
|
|||
|
"print(len(test_dev0_labels), len(test_dev0_labels[0]))\n",
|
|||
|
"\n",
|
|||
|
"print(train_labels[0])\n",
|
|||
|
"print(test_dev0_labels[0])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 19,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def get_scores(y_true, y_pred):\n",
|
|||
|
" # Funkcja zwraca precyzję, pokrycie i F1\n",
|
|||
|
" acc_score = 0\n",
|
|||
|
" tp = 0\n",
|
|||
|
" fp = 0\n",
|
|||
|
" selected_items = 0\n",
|
|||
|
" relevant_items = 0\n",
|
|||
|
"\n",
|
|||
|
" for p, t in zip(y_pred, y_true):\n",
|
|||
|
" if p == t:\n",
|
|||
|
" acc_score += 1\n",
|
|||
|
"\n",
|
|||
|
" if p > 0 and p == t:\n",
|
|||
|
" tp += 1\n",
|
|||
|
"\n",
|
|||
|
" if p > 0:\n",
|
|||
|
" selected_items += 1\n",
|
|||
|
"\n",
|
|||
|
" if t > 0:\n",
|
|||
|
" relevant_items += 1\n",
|
|||
|
"\n",
|
|||
|
" if selected_items == 0:\n",
|
|||
|
" precision = 1.0\n",
|
|||
|
" else:\n",
|
|||
|
" precision = tp / selected_items\n",
|
|||
|
"\n",
|
|||
|
" if relevant_items == 0:\n",
|
|||
|
" recall = 1.0\n",
|
|||
|
" else:\n",
|
|||
|
" recall = tp / relevant_items\n",
|
|||
|
"\n",
|
|||
|
" if precision + recall == 0.0:\n",
|
|||
|
" f1 = 0.0\n",
|
|||
|
" else:\n",
|
|||
|
" f1 = 2 * precision * recall / (precision + recall)\n",
|
|||
|
"\n",
|
|||
|
" return precision, recall, f1"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 20,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"num_tags = len(etykieta_na_kod.keys())\n",
|
|||
|
"\n",
|
|||
|
"class LSTM(torch.nn.Module):\n",
|
|||
|
"\n",
|
|||
|
" def __init__(self):\n",
|
|||
|
" super(LSTM, self).__init__()\n",
|
|||
|
" self.emb = torch.nn.Embedding(len(v.get_itos()), 100)\n",
|
|||
|
" self.rec = torch.nn.LSTM(100, 256, 1, batch_first=True)\n",
|
|||
|
" self.fc1 = torch.nn.Linear(256, num_tags)\n",
|
|||
|
"\n",
|
|||
|
" def forward(self, x):\n",
|
|||
|
" emb = torch.relu(self.emb(x))\n",
|
|||
|
" lstm_output, (h_n, c_n) = self.rec(emb)\n",
|
|||
|
" out_weights = self.fc1(lstm_output)\n",
|
|||
|
" return out_weights"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 21,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def eval_model(dataset_tokens, dataset_labels, model):\n",
|
|||
|
" Y_true = []\n",
|
|||
|
" Y_pred = []\n",
|
|||
|
" for i in tqdm(range(len(dataset_labels))):\n",
|
|||
|
" batch_tokens = dataset_tokens[i].unsqueeze(0)\n",
|
|||
|
" tags = list(dataset_labels[i].numpy())\n",
|
|||
|
" Y_true += tags\n",
|
|||
|
"\n",
|
|||
|
" Y_batch_pred_weights = model(batch_tokens).squeeze(0)\n",
|
|||
|
" Y_batch_pred = torch.argmax(Y_batch_pred_weights, 1)\n",
|
|||
|
" Y_pred += list(Y_batch_pred.numpy())\n",
|
|||
|
"\n",
|
|||
|
" return get_scores(Y_true, Y_pred)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 22,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"lstm = LSTM()\n",
|
|||
|
"criterion = torch.nn.CrossEntropyLoss()\n",
|
|||
|
"optimizer = torch.optim.Adam(lstm.parameters())\n",
|
|||
|
"NUM_EPOCHS = 5"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 23,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" 88%|████████▊ | 832/944 [00:37<00:05, 21.94it/s]\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"ename": "KeyboardInterrupt",
|
|||
|
"evalue": "",
|
|||
|
"output_type": "error",
|
|||
|
"traceback": [
|
|||
|
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|||
|
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
|||
|
"Cell \u001b[1;32mIn[23], line 13\u001b[0m\n\u001b[0;32m 10\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mzero_grad()\n\u001b[0;32m 11\u001b[0m loss \u001b[38;5;241m=\u001b[39m criterion(predicted_tags\u001b[38;5;241m.\u001b[39msqueeze(\u001b[38;5;241m0\u001b[39m), tags\u001b[38;5;241m.\u001b[39msqueeze(\u001b[38;5;241m1\u001b[39m))\n\u001b[1;32m---> 13\u001b[0m \u001b[43mloss\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 14\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mstep()\n\u001b[0;32m 16\u001b[0m lstm\u001b[38;5;241m.\u001b[39meval()\n",
|
|||
|
"File \u001b[1;32mc:\\Users\\Dominik\\Desktop\\Studia\\11.sem1\\en-ner-conll-2003\\.venv\\Lib\\site-packages\\torch\\_tensor.py:525\u001b[0m, in \u001b[0;36mTensor.backward\u001b[1;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[0;32m 515\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_torch_function_unary(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m 516\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m handle_torch_function(\n\u001b[0;32m 517\u001b[0m Tensor\u001b[38;5;241m.\u001b[39mbackward,\n\u001b[0;32m 518\u001b[0m (\u001b[38;5;28mself\u001b[39m,),\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 523\u001b[0m inputs\u001b[38;5;241m=\u001b[39minputs,\n\u001b[0;32m 524\u001b[0m )\n\u001b[1;32m--> 525\u001b[0m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mautograd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbackward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 526\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgradient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\n\u001b[0;32m 527\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
|
|||
|
"File \u001b[1;32mc:\\Users\\Dominik\\Desktop\\Studia\\11.sem1\\en-ner-conll-2003\\.venv\\Lib\\site-packages\\torch\\autograd\\__init__.py:267\u001b[0m, in \u001b[0;36mbackward\u001b[1;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[0;32m 262\u001b[0m retain_graph \u001b[38;5;241m=\u001b[39m create_graph\n\u001b[0;32m 264\u001b[0m \u001b[38;5;66;03m# The reason we repeat the same comment below is that\u001b[39;00m\n\u001b[0;32m 265\u001b[0m \u001b[38;5;66;03m# some Python versions print out the first line of a multi-line function\u001b[39;00m\n\u001b[0;32m 266\u001b[0m \u001b[38;5;66;03m# calls in the traceback and some print out the last line\u001b[39;00m\n\u001b[1;32m--> 267\u001b[0m \u001b[43m_engine_run_backward\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 268\u001b[0m \u001b[43m \u001b[49m\u001b[43mtensors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 269\u001b[0m \u001b[43m \u001b[49m\u001b[43mgrad_tensors_\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 270\u001b[0m \u001b[43m \u001b[49m\u001b[43mretain_graph\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 271\u001b[0m \u001b[43m \u001b[49m\u001b[43mcreate_graph\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 272\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 273\u001b[0m \u001b[43m \u001b[49m\u001b[43mallow_unreachable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m 274\u001b[0m \u001b[43m \u001b[49m\u001b[43maccumulate_grad\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m 275\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
|
|||
|
"File \u001b[1;32mc:\\Users\\Dominik\\Desktop\\Studia\\11.sem1\\en-ner-conll-2003\\.venv\\Lib\\site-packages\\torch\\autograd\\graph.py:744\u001b[0m, in \u001b[0;36m_engine_run_backward\u001b[1;34m(t_outputs, *args, **kwargs)\u001b[0m\n\u001b[0;32m 742\u001b[0m unregister_hooks \u001b[38;5;241m=\u001b[39m _register_logging_hooks_on_whole_graph(t_outputs)\n\u001b[0;32m 743\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 744\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mVariable\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_execution_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_backward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Calls into the C++ engine to run the backward pass\u001b[39;49;00m\n\u001b[0;32m 745\u001b[0m \u001b[43m \u001b[49m\u001b[43mt_outputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[0;32m 746\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# Calls into the C++ engine to run the backward pass\u001b[39;00m\n\u001b[0;32m 747\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 748\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m attach_logging_hooks:\n",
|
|||
|
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"for i in range(NUM_EPOCHS):\n",
|
|||
|
" lstm.train()\n",
|
|||
|
" # for i in tqdm(range(500)):\n",
|
|||
|
" for i in tqdm(range(len(train_labels))):\n",
|
|||
|
" batch_tokens = train_tokens_ids[i].unsqueeze(0)\n",
|
|||
|
" tags = train_labels[i].unsqueeze(1)\n",
|
|||
|
"\n",
|
|||
|
" predicted_tags = lstm(batch_tokens)\n",
|
|||
|
"\n",
|
|||
|
" optimizer.zero_grad()\n",
|
|||
|
" loss = criterion(predicted_tags.squeeze(0), tags.squeeze(1))\n",
|
|||
|
"\n",
|
|||
|
" loss.backward()\n",
|
|||
|
" optimizer.step()\n",
|
|||
|
"\n",
|
|||
|
" lstm.eval()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"100%|██████████| 214/214 [00:00<00:00, 262.62it/s]\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"(0.6558005752636625, 0.7225352112676057, 0.687552353828112)\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(eval_model(test_dev0_tokens_ids, test_dev0_labels, lstm))"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"author": "Jakub Pokrywka",
|
|||
|
"email": "kubapok@wmi.amu.edu.pl",
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"lang": "pl",
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.3"
|
|||
|
},
|
|||
|
"subtitle": "11.NER RNN[ćwiczenia]",
|
|||
|
"title": "Ekstrakcja informacji",
|
|||
|
"year": "2021"
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 4
|
|||
|
}
|