chinese-couplets-matching/model_2.ipynb

2135 lines
156 KiB
Plaintext
Raw Permalink Normal View History

2024-06-10 01:09:57 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 99,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "z8cJbMghvK3k",
"outputId": "09520694-de64-4046-c2a6-639031aa1a10"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (2.32.3)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests) (3.7)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests) (2024.6.2)\n",
"Requirement already satisfied: jieba in /usr/local/lib/python3.10/dist-packages (0.42.1)\n",
"Requirement already satisfied: pypinyin in /usr/local/lib/python3.10/dist-packages (0.51.0)\n",
"Using pip 23.1.2 from /usr/local/lib/python3.10/dist-packages/pip (python 3.10)\n",
"Looking in indexes: https://download.pytorch.org/whl/cu118\n",
"Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.3.1+cu118)\n",
"Requirement already satisfied: torchtext in /usr/local/lib/python3.10/dist-packages (0.18.0)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.14.0)\n",
"Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch) (4.12.1)\n",
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.12.1)\n",
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.3)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.4)\n",
"Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2023.6.0)\n",
"Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.8.89 in /usr/local/lib/python3.10/dist-packages (from torch) (11.8.89)\n",
"Requirement already satisfied: nvidia-cuda-runtime-cu11==11.8.89 in /usr/local/lib/python3.10/dist-packages (from torch) (11.8.89)\n",
"Requirement already satisfied: nvidia-cuda-cupti-cu11==11.8.87 in /usr/local/lib/python3.10/dist-packages (from torch) (11.8.87)\n",
"Requirement already satisfied: nvidia-cudnn-cu11==8.7.0.84 in /usr/local/lib/python3.10/dist-packages (from torch) (8.7.0.84)\n",
"Requirement already satisfied: nvidia-cublas-cu11==11.11.3.6 in /usr/local/lib/python3.10/dist-packages (from torch) (11.11.3.6)\n",
"Requirement already satisfied: nvidia-cufft-cu11==10.9.0.58 in /usr/local/lib/python3.10/dist-packages (from torch) (10.9.0.58)\n",
"Requirement already satisfied: nvidia-curand-cu11==10.3.0.86 in /usr/local/lib/python3.10/dist-packages (from torch) (10.3.0.86)\n",
"Requirement already satisfied: nvidia-cusolver-cu11==11.4.1.48 in /usr/local/lib/python3.10/dist-packages (from torch) (11.4.1.48)\n",
"Requirement already satisfied: nvidia-cusparse-cu11==11.7.5.86 in /usr/local/lib/python3.10/dist-packages (from torch) (11.7.5.86)\n",
"Requirement already satisfied: nvidia-nccl-cu11==2.20.5 in /usr/local/lib/python3.10/dist-packages (from torch) (2.20.5)\n",
"Requirement already satisfied: nvidia-nvtx-cu11==11.8.86 in /usr/local/lib/python3.10/dist-packages (from torch) (11.8.86)\n",
"Requirement already satisfied: triton==2.3.1 in /usr/local/lib/python3.10/dist-packages (from torch) (2.3.1)\n",
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from torchtext) (4.66.4)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchtext) (2.32.3)\n",
"Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchtext) (1.25.2)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.5)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->torchtext) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchtext) (3.7)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchtext) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchtext) (2024.6.2)\n",
"Requirement already satisfied: mpmath<1.4.0,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n",
"Requirement already satisfied: chardet in /usr/local/lib/python3.10/dist-packages (5.2.0)\n",
"Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.41.2)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.14.0)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.23.2)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.25.2)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.0)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2024.5.15)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.32.3)\n",
"Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.1)\n",
"Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.3)\n",
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.4)\n",
"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.0->transformers) (2023.6.0)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.0->transformers) (4.12.1)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.7)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.6.2)\n",
"Requirement already satisfied: ipywidgets in /usr/local/lib/python3.10/dist-packages (7.7.1)\n",
"Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets) (5.5.6)\n",
"Requirement already satisfied: ipython-genutils~=0.2.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets) (0.2.0)\n",
"Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets) (5.7.1)\n",
"Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets) (3.6.6)\n",
"Requirement already satisfied: ipython>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets) (7.34.0)\n",
"Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets) (3.0.11)\n",
"Requirement already satisfied: jupyter-client in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets) (8.6.2)\n",
"Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets) (6.3.3)\n",
"Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets) (67.7.2)\n",
"Requirement already satisfied: jedi>=0.16 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets) (0.19.1)\n",
"Requirement already satisfied: decorator in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets) (4.4.2)\n",
"Requirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets) (0.7.5)\n",
"Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets) (3.0.45)\n",
"Requirement already satisfied: pygments in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets) (2.16.1)\n",
"Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets) (0.2.0)\n",
"Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets) (0.1.7)\n",
"Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets) (4.9.0)\n",
"Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.10/dist-packages (from widgetsnbextension~=3.6.0->ipywidgets) (6.5.5)\n",
"Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets) (0.8.4)\n",
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (3.1.4)\n",
"Requirement already satisfied: pyzmq<25,>=17 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (24.0.1)\n",
"Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (23.1.0)\n",
"Requirement already satisfied: jupyter-core>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (5.7.2)\n",
"Collecting jupyter-client (from ipykernel>=4.5.1->ipywidgets)\n",
" Using cached jupyter_client-7.4.9-py3-none-any.whl (133 kB)\n",
"Requirement already satisfied: nbformat in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (5.10.4)\n",
"Requirement already satisfied: nbconvert>=5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (6.5.4)\n",
"Requirement already satisfied: nest-asyncio>=1.5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.6.0)\n",
"Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.8.3)\n",
"Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.18.1)\n",
"Requirement already satisfied: prometheus-client in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.20.0)\n",
"Requirement already satisfied: nbclassic>=0.4.7 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.1.0)\n",
"Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets) (0.4)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets) (2.8.2)\n",
"Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.10/dist-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets) (0.7.0)\n",
"Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=4.0.0->ipywidgets) (0.2.13)\n",
"Requirement already satisfied: platformdirs>=2.5 in /usr/local/lib/python3.10/dist-packages (from jupyter-core>=4.6.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (4.2.2)\n",
"Requirement already satisfied: notebook-shim>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.2.4)\n",
"Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (4.9.4)\n",
"Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (4.12.3)\n",
"Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (6.1.0)\n",
"Requirement already satisfied: defusedxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.7.1)\n",
"Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.3.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (2.1.5)\n",
"Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.8.4)\n",
"Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.10.0)\n",
"Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (24.0)\n",
"Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.5.1)\n",
"Requirement already satisfied: tinycss2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.3.0)\n",
"Requirement already satisfied: fastjsonschema>=2.15 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (2.19.1)\n",
"Requirement already satisfied: jsonschema>=2.6 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (4.19.2)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->jupyter-client->ipykernel>=4.5.1->ipywidgets) (1.16.0)\n",
"Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.10/dist-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (21.2.0)\n",
"Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (23.2.0)\n",
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (2023.12.1)\n",
"Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.35.1)\n",
"Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.18.1)\n",
"Requirement already satisfied: jupyter-server<3,>=1.8 in /usr/local/lib/python3.10/dist-packages (from notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.24.0)\n",
"Requirement already satisfied: cffi>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.16.0)\n",
"Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (2.5)\n",
"Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.5.1)\n",
"Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (2.22)\n",
"Requirement already satisfied: anyio<4,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (3.7.1)\n",
"Requirement already satisfied: websocket-client in /usr/local/lib/python3.10/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.8.0)\n",
"Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (3.7)\n",
"Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.3.1)\n",
"Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.2.1)\n",
"Installing collected packages: jupyter-client\n",
" Attempting uninstall: jupyter-client\n",
" Found existing installation: jupyter_client 8.6.2\n",
" Uninstalling jupyter_client-8.6.2:\n",
" Successfully uninstalled jupyter_client-8.6.2\n",
"Successfully installed jupyter-client-7.4.9\n",
"Requirement already satisfied: jupyter_core in /usr/local/lib/python3.10/dist-packages (5.7.2)\n",
"Requirement already satisfied: jupyter_client in /usr/local/lib/python3.10/dist-packages (7.4.9)\n",
"Collecting jupyter_client\n",
" Using cached jupyter_client-8.6.2-py3-none-any.whl (105 kB)\n",
"Requirement already satisfied: platformdirs>=2.5 in /usr/local/lib/python3.10/dist-packages (from jupyter_core) (4.2.2)\n",
"Requirement already satisfied: traitlets>=5.3 in /usr/local/lib/python3.10/dist-packages (from jupyter_core) (5.7.1)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from jupyter_client) (2.8.2)\n",
"Requirement already satisfied: pyzmq>=23.0 in /usr/local/lib/python3.10/dist-packages (from jupyter_client) (24.0.1)\n",
"Requirement already satisfied: tornado>=6.2 in /usr/local/lib/python3.10/dist-packages (from jupyter_client) (6.3.3)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->jupyter_client) (1.16.0)\n",
"Installing collected packages: jupyter_client\n",
" Attempting uninstall: jupyter_client\n",
" Found existing installation: jupyter_client 7.4.9\n",
" Uninstalling jupyter_client-7.4.9:\n",
" Successfully uninstalled jupyter_client-7.4.9\n",
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
"notebook 6.5.5 requires jupyter-client<8,>=5.3.4, but you have jupyter-client 8.6.2 which is incompatible.\u001b[0m\u001b[31m\n",
"\u001b[0mSuccessfully installed jupyter_client-8.6.2\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (2.0.3)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2023.4)\n",
"Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.1)\n",
"Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas) (1.25.2)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
"\u001b[31mERROR: Could not find a version that satisfies the requirement re (from versions: none)\u001b[0m\u001b[31m\n",
"\u001b[0m\u001b[31mERROR: No matching distribution found for re\u001b[0m\u001b[31m\n",
"\u001b[0mRequirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (1.2.2)\n",
"Requirement already satisfied: numpy>=1.17.3 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.25.2)\n",
"Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.10.1)\n",
"Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.4.2)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (3.5.0)\n",
"Requirement already satisfied: scipy==1.10.1 in /usr/local/lib/python3.10/dist-packages (1.10.1)\n",
"Requirement already satisfied: numpy<1.27.0,>=1.19.5 in /usr/local/lib/python3.10/dist-packages (from scipy==1.10.1) (1.25.2)\n",
"Requirement already satisfied: gensim in /usr/local/lib/python3.10/dist-packages (4.3.2)\n",
"Requirement already satisfied: numpy>=1.18.5 in /usr/local/lib/python3.10/dist-packages (from gensim) (1.25.2)\n",
"Requirement already satisfied: scipy>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from gensim) (1.10.1)\n",
"Requirement already satisfied: smart-open>=1.8.1 in /usr/local/lib/python3.10/dist-packages (from gensim) (6.4.0)\n"
]
}
],
"source": [
"%pip install --upgrade requests\n",
"%pip install jieba\n",
"%pip install pypinyin\n",
"%pip install -v torch torchtext --index-url https://download.pytorch.org/whl/cu118\n",
"%pip install chardet\n",
"%pip install transformers\n",
"%pip install ipywidgets\n",
"%pip install --upgrade jupyter_core jupyter_client\n",
"%pip install pandas\n",
"%pip install re\n",
"%pip install scikit-learn\n",
"%pip install scipy==1.10.1\n",
"%pip install gensim"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-fw8K0r8vK3m",
"outputId": "7c973fa0-05f5-42c6-cb7e-66b8d0500c6b"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Enabling notebook extension jupyter-js-widgets/extension...\n",
"Paths used for configuration of notebook: \n",
" \t/root/.jupyter/nbconfig/notebook.json\n",
"Paths used for configuration of notebook: \n",
" \t\n",
" - Validating: \u001b[32mOK\u001b[0m\n",
"Paths used for configuration of notebook: \n",
" \t/root/.jupyter/nbconfig/notebook.json\n"
]
}
],
"source": [
"!jupyter nbextension enable --py widgetsnbextension\n",
"import jieba\n",
"import pypinyin\n",
"import torch\n",
"from transformers import AutoTokenizer, AutoModel\n",
"import pandas\n",
"import re\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.datasets import load_iris\n",
"import numpy"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5EWDs2qIvK3n",
"outputId": "a0d96d62-9121-43de-9924-5fa9c67c0a6f"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"True\n"
]
}
],
"source": [
"print(torch.cuda.is_available())\n",
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")"
]
},
{
"cell_type": "code",
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "V7yH6_p0xVM4",
"outputId": "89c42e07-65d0-4e8b-ecfa-8d0c0c2fa7cf"
},
"execution_count": 102,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "hAfKmY-8vK3o"
},
"source": [
"## Normalizacja wejścia - pozbycie się spacji i znaków innych niż chińskie (interpunkcyjnych).\n",
"### TODO - przepisać używając słownika znaków chińskich?"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {
"id": "70WYCTvLvK3p"
},
"outputs": [],
"source": [
"# lower() - male litery\n",
"# strip() - bez krancowych znakow niedrukowalnych\n",
"# bez znakow interpunkcyjnych\n",
"def normalizeString(s):\n",
" s = s.lower().strip()\n",
" s = re.sub(r\"([.!?])\", r\"\", s)\n",
" s = re.sub(r\"([,;:-])\", r\"\", s)\n",
" s = re.sub(r\"([。,?”“《》·、!:;π…ㄚ])\", r\"\", s)\n",
" s = re.sub(r\"([/])\", r\"\", s)\n",
" s = re.sub(r\"(['\\\"])\", r\" \", s)\n",
" return s.strip()\n",
"\n",
"def normalizeChinese(s):\n",
" s = normalizeString(s)\n",
" pom = \"\"\n",
" for c in s:\n",
" if c != \" \":\n",
" pom+=c\n",
" #pom+=\" \"\n",
" return pom.strip()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "AzN_U3XuvK3p"
},
"source": [
"## Wczytanie zbioru danych. https://www.kaggle.com/datasets/marquis03/chinese-couplets-dataset"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "YMFveDsRvK3q",
"outputId": "0f9142d3-94bb-4bd1-d5aa-ad6c0d506a85"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"腾飞上铁锐意改革谋发展勇当千里马\n",
"和谐南供安全送电保畅通争做领头羊\n"
]
}
],
"source": [
"fixed_couplets_in = pandas.read_csv(\"fixed_couplets_in.txt\", sep=\"\\t\", names=[\"in\"], header=None)\n",
"fixed_couplets_out = pandas.read_csv(\"fixed_couplets_out.txt\", sep=\"\\t\", names=[\"out\"], header=None)\n",
"\n",
"normalized_fixed_couplets_in=[]\n",
"for _ in fixed_couplets_in[\"in\"]:\n",
" normalized_fixed_couplets_in.append(normalizeChinese(_))\n",
"normalized_fixed_couplets_out=[]\n",
"for _ in fixed_couplets_out[\"out\"]:\n",
" normalized_fixed_couplets_out.append(normalizeChinese(_))\n",
"\n",
"print(normalized_fixed_couplets_in[0])\n",
"print(normalized_fixed_couplets_out[0])"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "lRWgbshhvK3q",
"outputId": "633b6e24-3892-48e9-fde1-4e82b4d32e57"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" in out\n",
"0 腾飞上铁锐意改革谋发展勇当千里马 和谐南供安全送电保畅通争做领头羊\n",
"1 风弦未拨心先乱 夜幕已沉梦更闲\n",
"2 花梦粘于春袖口 莺声溅落柳枝头\n",
"3 晋世文章昌二陆 魏家词赋重三曹\n",
"4 一句相思吟岁月 千杯美酒醉风情\n",
"... ... ...\n",
"744910 半榻诗书盈陋室 一墙字画靓寒庐\n",
"744911 借角青山埋姓字 掬壶明月洗尘心\n",
"744912 苑内尽天姿锦窠仙髻无双艳 亭前多国色金粉紫檀第一香\n",
"744913 浩淼洞庭极目天为界 安闲钓叟静心孰羡鱼\n",
"744914 志踏云梯能揽月 坚磨铁棒可成针\n",
"\n",
"[744915 rows x 2 columns]\n"
]
}
],
"source": [
"fixed_couplets = pandas.DataFrame(\n",
" {\"in\": normalized_fixed_couplets_in,\n",
" \"out\": normalized_fixed_couplets_out\n",
" }\n",
" )\n",
"print(fixed_couplets)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "BaOkCk66vK3r"
},
"source": [
"### Odrzucenie 95% danych - więcej niż 5% zajmuje całą pamięć i wywala program.\n",
"### Podział danych na zbiór treningowy i testowy."
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gZhvQ-9SvK3r",
"outputId": "516353b4-84df-4b7c-fcd5-4ff7642cf46e"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" in out\n",
"567354 宇高炎暑净 秋爽飒风来\n",
"118920 忧乐关天下 安危系一身\n",
"738591 一盏相思量寂寞 三分惆怅兑凄凉\n",
"509346 孝驻锦绣城喜吕梁歌飞春融三晋千秋画 义圆和谐梦看汾河景瑞水起九州万卷诗\n",
"75388 春临八桂海豚舞 福满九州彩凤飞\n",
"... ... ...\n",
"116492 创中华古老文明当同日月齐辉功垂万代 启黎庶鸿蒙草昧是与山河并寿德颂千秋\n",
"91658 纠缠海角指相思何时作罢 浪迹天涯心倦怠哪处归依\n",
"101376 特地显英灵化被逢人歌泽渥 配天昭厚德恩深无处不波恬\n",
"262048 温暖鹅城展翅奋飞中国梦 祥和蛇岁铺春欢庆小康年\n",
"415192 百业一支歌歌伴和风谐雨唱 九江千古梦梦同朗月艳阳圆\n",
"\n",
"[5959 rows x 2 columns]\n",
" in out\n",
"274864 林霭渐浓迷古寺 尘烟已远隐青山\n",
"222320 自古青天匡正义 而今华夏振雄风\n",
"100260 真心请客就该一五一五 假意为情何必我开我开\n",
"435928 爱本有心今不见 人如无欲意何求\n",
"446991 欲抹闲愁实不易 谁将片语问何求\n",
"... ... ...\n",
"213030 万象随缘观自在 一心发愿待君归\n",
"299155 春联妙句动心魄 小院雅风入彩光\n",
"643294 梅亭吹雪横霜笛 松麓邀云放月筝\n",
"628861 红似桃花白似雪 绿如李叶亮如霜\n",
"566605 数字双音分两用 联文对句限孤平\n",
"\n",
"[1490 rows x 2 columns]\n"
]
}
],
"source": [
"male, duze = train_test_split(fixed_couplets,test_size=0.99,random_state=42)\n",
"treningowe, testowe = train_test_split(male,test_size=0.2,random_state=42)\n",
"print(treningowe)\n",
"print(testowe)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "2Ek1LXPjvK3r"
},
"source": [
"### Przywrócenie numeracji od 0."
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Pe3iTklRvK3r",
"outputId": "d8378e46-4634-49f0-f6c7-cc415cb2a032"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" in out\n",
"0 宇高炎暑净 秋爽飒风来\n",
"1 忧乐关天下 安危系一身\n",
"2 一盏相思量寂寞 三分惆怅兑凄凉\n",
"3 孝驻锦绣城喜吕梁歌飞春融三晋千秋画 义圆和谐梦看汾河景瑞水起九州万卷诗\n",
"4 春临八桂海豚舞 福满九州彩凤飞\n",
"... ... ...\n",
"5954 创中华古老文明当同日月齐辉功垂万代 启黎庶鸿蒙草昧是与山河并寿德颂千秋\n",
"5955 纠缠海角指相思何时作罢 浪迹天涯心倦怠哪处归依\n",
"5956 特地显英灵化被逢人歌泽渥 配天昭厚德恩深无处不波恬\n",
"5957 温暖鹅城展翅奋飞中国梦 祥和蛇岁铺春欢庆小康年\n",
"5958 百业一支歌歌伴和风谐雨唱 九江千古梦梦同朗月艳阳圆\n",
"\n",
"[5959 rows x 2 columns]\n",
" in out\n",
"0 林霭渐浓迷古寺 尘烟已远隐青山\n",
"1 自古青天匡正义 而今华夏振雄风\n",
"2 真心请客就该一五一五 假意为情何必我开我开\n",
"3 爱本有心今不见 人如无欲意何求\n",
"4 欲抹闲愁实不易 谁将片语问何求\n",
"... ... ...\n",
"1485 万象随缘观自在 一心发愿待君归\n",
"1486 春联妙句动心魄 小院雅风入彩光\n",
"1487 梅亭吹雪横霜笛 松麓邀云放月筝\n",
"1488 红似桃花白似雪 绿如李叶亮如霜\n",
"1489 数字双音分两用 联文对句限孤平\n",
"\n",
"[1490 rows x 2 columns]\n"
]
}
],
"source": [
"treningowe = treningowe.reset_index(drop=True)\n",
"testowe = testowe.reset_index(drop=True)\n",
"print(treningowe)\n",
"print(testowe)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "1_w2PXrGvK3s"
},
"source": [
"### Pakiet *pypinyin* przewiduje wymowę pinyin dobrze bez potrzeby używania pakietu *jieba*."
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5dnmD3wevK3s",
"outputId": "7b3a7727-f0f5-4657-e3b1-e4760c81144d"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"春临八桂海豚舞\n",
"[['chun1'], ['lin2'], ['ba1'], ['gui4'], ['hai3'], ['tun2'], ['wu3']]\n",
"['chun1', 'lin2', 'ba1', 'gui4', 'hai3', 'tun2', 'wu3']\n",
"['春临', '八桂', '海豚', '舞']\n",
"[['chun1'], ['lin2'], ['ba1'], ['gui4'], ['hai3'], ['tun2'], ['wu3']]\n",
"['chun1', 'lin2', 'ba1', 'gui4', 'hai3', 'tun2', 'wu3']\n"
]
}
],
"source": [
"from pypinyin import pinyin, lazy_pinyin, Style\n",
"\n",
"zdanie = treningowe[\"in\"][4]\n",
"print(zdanie)\n",
"print(pinyin(zdanie, style=Style.TONE3, neutral_tone_with_five=True))\n",
"print(lazy_pinyin(zdanie, style=Style.TONE3, neutral_tone_with_five=True))\n",
"\n",
"slowa = list(jieba.cut(zdanie))\n",
"print(slowa)\n",
"print(pinyin(slowa, style=Style.TONE3, neutral_tone_with_five=True))\n",
"print(lazy_pinyin(slowa, style=Style.TONE3, neutral_tone_with_five=True))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "LghjKuLGvK3s"
},
"source": [
"## Podział wymowy pinyin na początki (initials), końcówki (finals) i tony.\n",
"### Zamina w liczby przy pomocy słownika."
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {
"id": "xj0SAPGsvK3s"
},
"outputs": [],
"source": [
"from pypinyin.contrib.tone_convert import to_finals, to_initials\n",
"# 声母表\n",
"_INITIALS=['b','p','m','f','d','t','n','l','g','k','h','j','q','x','zh','ch','sh','r','z','c','s',]\n",
"# 声母表,把 y,w 也当作声母\n",
"_INITIALS_NOT_STRICT=_INITIALS+['y','w']\n",
"# 韵母表\n",
"_FINALS=['i','u','ü','a','ia','ua','o','uo','e','ie','üe','ai','uai','ei','uei','ao','iao','ou','iou','an','ian','uan','üan','en','in','uen','ün','ang','iang','uang','eng','ing','ueng','ong','iong','er','ê',]\n",
"\n",
"slownik_initials = {}\n",
"licznik = 1\n",
"for indeks_wersu_pierwszego in _INITIALS+[\"\"]:\n",
" slownik_initials[indeks_wersu_pierwszego] = licznik\n",
" licznik+=1\n",
"\n",
"slownik_finals = {}\n",
"licznik = 1\n",
"for indeks_wersu_pierwszego in _FINALS+[\"\"]:\n",
" slownik_finals[indeks_wersu_pierwszego] = licznik\n",
" licznik+=1\n",
"\n",
"def poczatek_koniec_ton(zapis_pinyin_3):\n",
" poczatek = slownik_initials[to_initials(zapis_pinyin_3)]\n",
" koniec = slownik_finals[to_finals(zapis_pinyin_3).replace('v', 'ü')]\n",
" ton = int(zapis_pinyin_3[-1])\n",
" return poczatek, koniec, ton\n",
"\n",
"def wymowy_i_tony_zdania(zdanie):\n",
" zapis_pinyin_3_zdania = lazy_pinyin(zdanie, style=Style.TONE3, neutral_tone_with_five=True)\n",
" poczatki = []\n",
" konce =[]\n",
" tony = []\n",
" # print(zdanie, zapis_pinyin_3_zdania)\n",
" for zp3 in zapis_pinyin_3_zdania:\n",
" p,k,t = poczatek_koniec_ton(zp3)\n",
" poczatki.append(p)\n",
" konce.append(k)\n",
" tony.append(t)\n",
" return poczatki, konce, tony\n",
"\n",
"def dopasuj_dlugosc_wektora(wektor, dlugosc_wektora):\n",
" if len(wektor)>dlugosc_wektora:\n",
" wynik = wektor[:dlugosc_wektora]\n",
" else:\n",
" wynik = numpy.pad(wektor,(0,dlugosc_wektora-len(wektor)), mode='constant', constant_values=0)\n",
" return wynik"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {
"id": "s4NN04ZpvK3t"
},
"outputs": [],
"source": [
"def poczatki_konce_tony_dla_zdan(zdania, liczba_wejscia):\n",
" poczatki_wyn = []\n",
" konce_wyn = []\n",
" tony_wyn = []\n",
"\n",
" for zdanie in zdania:\n",
" poczatki, konce, tony = wymowy_i_tony_zdania(zdanie)\n",
"\n",
" poczatki = dopasuj_dlugosc_wektora(poczatki, liczba_wejscia)\n",
" konce = dopasuj_dlugosc_wektora(konce, liczba_wejscia)\n",
" tony = dopasuj_dlugosc_wektora(tony, liczba_wejscia)\n",
"\n",
" poczatki_wyn.append(poczatki)\n",
" konce_wyn.append(konce)\n",
" tony_wyn.append(tony)\n",
"\n",
" return poczatki_wyn, konce_wyn, tony_wyn"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "fyj5LRN3vK3t"
},
"source": [
"### Początki, końcówki i tony wierszy treningowych."
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "vvooWU0hvK3t",
"outputId": "e08da1e6-3955-4e7b-fcda-a4a75231fec9"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"5959\n",
"宇高炎暑净\n",
"['yu3', 'gao1', 'yan2', 'shu3', 'jing4']\n",
"秋爽飒风来\n",
"['qiu1', 'shuang3', 'sa4', 'feng1', 'lai2']\n",
"5959\n",
"[22 9 22 17 12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0 0 0 0 0 0 0 0 0 0 0]\n",
"[ 3 16 21 2 32 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0 0 0 0 0 0 0 0 0 0 0]\n",
"[3 1 2 3 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n",
"[13 17 21 4 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0 0 0 0 0 0 0 0 0 0 0]\n",
"[19 30 4 31 12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0 0 0 0 0 0 0 0 0 0 0]\n",
"[1 3 4 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n"
]
}
],
"source": [
"print(len(treningowe[\"in\"]))\n",
"print(treningowe[\"in\"][0])\n",
"print(lazy_pinyin(treningowe[\"in\"][0], style=Style.TONE3, neutral_tone_with_five=True))\n",
"print(treningowe[\"out\"][0])\n",
"print(lazy_pinyin(treningowe[\"out\"][0], style=Style.TONE3, neutral_tone_with_five=True))\n",
"\n",
"liczba_wejscia = 35\n",
"\n",
"poczatki_treningowe_in, konce_treningowe_in, tony_treningowe_in = poczatki_konce_tony_dla_zdan(treningowe[\"in\"], liczba_wejscia)\n",
"poczatki_treningowe_out, konce_treningowe_out, tony_treningowe_out = poczatki_konce_tony_dla_zdan(treningowe[\"out\"], liczba_wejscia)\n",
"\n",
"print(len(poczatki_treningowe_in))\n",
"print(poczatki_treningowe_in[0])\n",
"print(konce_treningowe_in[0])\n",
"print(tony_treningowe_in[0])\n",
"print(poczatki_treningowe_out[0])\n",
"print(konce_treningowe_out[0])\n",
"print(tony_treningowe_out[0])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ZQd6eh5bvK3u"
},
"source": [
"## Zanurzenia BAAI wierszy treningowych. https://huggingface.co/BAAI/bge-small-zh-v1.5"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {
"id": "61vGtuyBvK3u"
},
"outputs": [],
"source": [
"# # Load model from HuggingFace Hub\n",
"# tokenizer = AutoTokenizer.from_pretrained(\"BAAI/bge-small-zh-v1.5\")\n",
"# model = AutoModel.from_pretrained(\"BAAI/bge-small-zh-v1.5\")\n",
"# model.eval()\n",
"\n",
"# def zanurzenia_zdan(lista_zdan):\n",
"# # Sentences we want sentence embeddings for\n",
"# #sentences = [\"样例数据-1样例数据\", \"样例数据-2样例数据\"]\n",
"# sentences = lista_zdan\n",
"\n",
"# # Tokenize sentences\n",
"# encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')\n",
"# # for s2p(short query to long passage) retrieval task, add an instruction to query (not add instruction for passages)\n",
"# # encoded_input = tokenizer([instruction + q for q in queries], padding=True, truncation=True, return_tensors='pt')\n",
"\n",
"# # Compute token embeddings\n",
"# with torch.no_grad():\n",
"# model_output = model(**encoded_input)\n",
"# # Perform pooling. In this case, cls pooling.\n",
"# sentence_embeddings = model_output[0][:, 0]\n",
"# # normalize embeddings\n",
"# sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1)\n",
"# # print(\"Sentence embeddings shape:\", sentence_embeddings.shape)\n",
"# # print(\"Sentence embeddings:\", sentence_embeddings)\n",
"\n",
"# return sentence_embeddings\n",
"\n",
"# def zanurzenie_zdania(zdanie):\n",
"# # Tokenize sentences\n",
"# encoded_input = tokenizer(zdanie, padding=True, truncation=True, return_tensors='pt')\n",
"# # for s2p(short query to long passage) retrieval task, add an instruction to query (not add instruction for passages)\n",
"# # encoded_input = tokenizer([instruction + q for q in queries], padding=True, truncation=True, return_tensors='pt')\n",
"\n",
"# # Compute token embeddings\n",
"# with torch.no_grad():\n",
"# model_output = model(**encoded_input)\n",
"# # Perform pooling. In this case, cls pooling.\n",
"# sentence_embedding = model_output[0][:, 0]\n",
"# # normalize embeddings\n",
"# sentence_embedding = torch.nn.functional.normalize(sentence_embedding, p=2, dim=1)\n",
"\n",
"# return sentence_embedding"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {
"id": "4gt7oqi4vK3u"
},
"outputs": [],
"source": [
"# treningowe_in_lista = treningowe[\"in\"].tolist()\n",
"# treningowe_out_lista = treningowe[\"out\"].tolist()\n",
"\n",
"# print(len(treningowe_in_lista))\n",
"# print(treningowe_in_lista[0])\n",
"# print(treningowe_out_lista[0])\n",
"\n",
"# zanurzenia_treningowe_in = zanurzenia_zdan(treningowe_in_lista)\n",
"# zanurzenia_treningowe_out = zanurzenia_zdan(treningowe_out_lista)\n",
"\n",
"# print(zanurzenia_treningowe_in.shape)\n",
"# print(zanurzenia_treningowe_in[0])\n",
"# print(zanurzenia_treningowe_out[0])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "TAfBMDqlvK3u"
},
"source": [
"### Tensory - reprezentacje pierwszych wersów wierszy treningowych."
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "hV9h84mrvK3v",
"outputId": "0fbd174d-e6f3-4a08-892e-0de2ac838f99"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"5959\n",
"torch.Size([109])\n",
"tensor([22, 9, 22, 17, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,\n",
" 16, 21, 2, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1,\n",
" 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 12, 32,\n",
" 4])\n"
]
}
],
"source": [
"x = []\n",
"for indeks_wersu_pierwszego in range(len(poczatki_treningowe_in)):\n",
" poczatki = poczatki_treningowe_in\n",
" konce = konce_treningowe_in\n",
" tony = tony_treningowe_in\n",
" niezerowe_poczatki = [p for p in poczatki[indeks_wersu_pierwszego] if p>0.0]\n",
" poczatek_ostatniego_znaku = niezerowe_poczatki[len(niezerowe_poczatki)-1]\n",
" koniec_ostatniego_znaku = konce[indeks_wersu_pierwszego][len(niezerowe_poczatki)-1]\n",
" ton_ostatniego_znaku = tony[indeks_wersu_pierwszego][len(niezerowe_poczatki)-1]\n",
" x.append(torch.cat(\n",
" (\n",
" # zanurzenia_treningowe_in[indeks_wersu_pierwszego],\n",
" torch.from_numpy(poczatki_treningowe_in[indeks_wersu_pierwszego]),\n",
" torch.from_numpy(konce_treningowe_in[indeks_wersu_pierwszego]),\n",
" torch.from_numpy(tony_treningowe_in[indeks_wersu_pierwszego]),\n",
" torch.from_numpy(numpy.array([len(niezerowe_poczatki)])),\n",
" torch.from_numpy(numpy.array([poczatek_ostatniego_znaku])),\n",
" torch.from_numpy(numpy.array([koniec_ostatniego_znaku])),\n",
" torch.from_numpy(numpy.array([ton_ostatniego_znaku]))\n",
" )\n",
" ))\n",
"print(len(x))\n",
"print(x[0].shape)\n",
"print(x[0])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "wkLMwhjevK3v"
},
"source": [
"### Tensory - reprezentacje drugich wersów wierszy treningowych."
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "PqKvzCJavK3v",
"outputId": "dbfad29e-8312-49ea-a2fe-891604f8092a"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"5959\n",
"torch.Size([109])\n",
"tensor([13, 17, 21, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19,\n",
" 30, 4, 31, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3,\n",
" 4, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 8, 12,\n",
" 2])\n"
]
}
],
"source": [
"y = []\n",
"for indeks_wersu_pierwszego in range(len(poczatki_treningowe_out)):\n",
" poczatki = poczatki_treningowe_out\n",
" konce = konce_treningowe_out\n",
" tony = tony_treningowe_out\n",
" niezerowe_poczatki = [p for p in poczatki[indeks_wersu_pierwszego] if p>0.0]\n",
" poczatek_ostatniego_znaku = niezerowe_poczatki[len(niezerowe_poczatki)-1]\n",
" koniec_ostatniego_znaku = konce[indeks_wersu_pierwszego][len(niezerowe_poczatki)-1]\n",
" ton_ostatniego_znaku = tony[indeks_wersu_pierwszego][len(niezerowe_poczatki)-1]\n",
" y.append(\n",
" torch.cat(\n",
" (\n",
" # zanurzenia_treningowe_out[indeks_wersu_pierwszego],\n",
" torch.from_numpy(poczatki_treningowe_out[indeks_wersu_pierwszego]),\n",
" torch.from_numpy(konce_treningowe_out[indeks_wersu_pierwszego]),\n",
" torch.from_numpy(tony_treningowe_out[indeks_wersu_pierwszego]),\n",
" torch.from_numpy(numpy.array([len(niezerowe_poczatki)])),\n",
" torch.from_numpy(numpy.array([poczatek_ostatniego_znaku])),\n",
" torch.from_numpy(numpy.array([koniec_ostatniego_znaku])),\n",
" torch.from_numpy(numpy.array([ton_ostatniego_znaku]))\n",
" )\n",
" )\n",
" )\n",
"print(len(y))\n",
"print(y[0].shape)\n",
"print(y[0])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "-8hQv7y0vK3w"
},
"source": [
"## Wejście do sieci neuronowej.\n",
"### Odpowiadające sobie wersy i kilka losowo dobranych nieodpowiadających sobie wersów."
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "G_zK4KIRvK3w",
"outputId": "7966a2bc-b13e-4ac1-a0d4-ae6b7a51c181"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"29795\n",
"tensor([22, 9, 22, 17, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,\n",
" 16, 21, 2, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1,\n",
" 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 12, 32,\n",
" 4, 13, 17, 21, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 19, 30, 4, 31, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,\n",
" 3, 4, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 8,\n",
" 12, 2])\n",
"29795\n",
"1\n"
]
}
],
"source": [
"from random import sample\n",
"X = []\n",
"Y = []\n",
"for indeks_wersu_drugiego in range(len(x)):\n",
" indeksy = sample(range(len(y)), 5)\n",
" if indeks_wersu_drugiego not in indeksy:\n",
" indeksy[0] = indeks_wersu_drugiego\n",
" for k in indeksy:\n",
" X.append(\n",
" torch.cat(\n",
" (x[indeks_wersu_drugiego], y[k])\n",
" )\n",
" )\n",
" if indeks_wersu_drugiego==k:\n",
" Y.append(1)\n",
" else:\n",
" Y.append(0)\n",
"\n",
"print(len(X))\n",
"print(X[0])\n",
"print(len(Y))\n",
"print(Y[0])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4i_4--zBvK3w"
},
"source": [
"## Modele sklearn."
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 74
},
"id": "WuDfk70LvK3w",
"outputId": "35065e9c-8c6b-4837-8018-49cc504f0d90"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"MLPClassifier()"
],
"text/html": [
"<style>#sk-container-id-7 {color: black;background-color: white;}#sk-container-id-7 pre{padding: 0;}#sk-container-id-7 div.sk-toggleable {background-color: white;}#sk-container-id-7 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-7 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-7 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-7 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-7 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-7 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-7 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-7 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-7 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-7 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-7 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-7 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-7 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-7 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-7 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-7 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-7 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-7 div.sk-item {position: relative;z-index: 1;}#sk-container-id-7 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-7 div.sk-item::before, #sk-container-id-7 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-7 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-7 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-7 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-7 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-7 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-7 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-7 div.sk-label-container {text-align: center;}#sk-container-id-7 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-7 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-7\" class=\"sk-top-con
]
},
"metadata": {},
"execution_count": 117
}
],
"source": [
"from sklearn.neural_network import MLPClassifier\n",
"klasyfikator = MLPClassifier() # activation=\"tanh\"\n",
"\n",
"klasyfikator.fit(X, Y)"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 74
},
"id": "VpAcpuprvK3x",
"outputId": "8306fb43-7d24-4b52-f1bd-e0373da43f42"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"MLPRegressor()"
],
"text/html": [
"<style>#sk-container-id-8 {color: black;background-color: white;}#sk-container-id-8 pre{padding: 0;}#sk-container-id-8 div.sk-toggleable {background-color: white;}#sk-container-id-8 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-8 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-8 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-8 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-8 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-8 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-8 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-8 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-8 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-8 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-8 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-8 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-8 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-8 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-8 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-8 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-8 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-8 div.sk-item {position: relative;z-index: 1;}#sk-container-id-8 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-8 div.sk-item::before, #sk-container-id-8 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-8 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-8 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-8 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-8 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-8 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-8 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-8 div.sk-label-container {text-align: center;}#sk-container-id-8 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-8 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-8\" class=\"sk-top-con
]
},
"metadata": {},
"execution_count": 118
}
],
"source": [
"from sklearn.neural_network import MLPRegressor\n",
"regresor = MLPRegressor() # activation=\"tanh\"\n",
"\n",
"regresor.fit(X, Y)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Q1RYkn4YvK3x"
},
"source": [
"### Początki, końcówki i tony wierszy testowych."
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "HUtENh9bvK3x",
"outputId": "1cd28e90-bf60-42b5-86a6-9f94548a40e7"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"1490\n",
"林霭渐浓迷古寺\n",
"尘烟已远隐青山\n",
"1490\n",
"[ 8 22 12 7 3 9 21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0 0 0 0 0 0 0 0 0 0 0]\n",
"[25 12 21 34 1 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0 0 0 0 0 0 0 0 0 0 0]\n",
"[2 3 4 2 2 3 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n",
"[16 22 22 22 22 13 17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0 0 0 0 0 0 0 0 0 0 0]\n",
"[24 21 1 23 25 32 20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
" 0 0 0 0 0 0 0 0 0 0 0]\n",
"[2 1 3 3 3 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n"
]
}
],
"source": [
"print(len(testowe[\"in\"]))\n",
"print(testowe[\"in\"][0])\n",
"print(testowe[\"out\"][0])\n",
"\n",
"liczba_wejscia = 35\n",
"\n",
"poczatki_testowe_in, konce_testowe_in, tony_testowe_in = poczatki_konce_tony_dla_zdan(testowe[\"in\"], liczba_wejscia)\n",
"poczatki_testowe_out, konce_testowe_out, tony_testowe_out = poczatki_konce_tony_dla_zdan(testowe[\"out\"], liczba_wejscia)\n",
"\n",
"print(len(poczatki_testowe_in))\n",
"print(poczatki_testowe_in[0])\n",
"print(konce_testowe_in[0])\n",
"print(tony_testowe_in[0])\n",
"print(poczatki_testowe_out[0])\n",
"print(konce_testowe_out[0])\n",
"print(tony_testowe_out[0])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "JVmApwk1vK3x"
},
"source": [
"### Zanurzenia BAAI wierszy testowych."
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {
"id": "f0564iMSvK3x"
},
"outputs": [],
"source": [
"# testowe_in_lista = testowe[\"in\"].tolist()\n",
"# testowe_out_lista = testowe[\"out\"].tolist()\n",
"\n",
"# print(len(testowe_in_lista))\n",
"# print(testowe_in_lista[0])\n",
"# print(testowe_out_lista[0])\n",
"\n",
"# zanurzenia_testowe_in = zanurzenia_zdan(testowe_in_lista)\n",
"# zanurzenia_testowe_out = zanurzenia_zdan(testowe_out_lista)\n",
"\n",
"# print(zanurzenia_testowe_in.shape)\n",
"# print(zanurzenia_testowe_in[0])\n",
"# print(zanurzenia_testowe_out[0])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4G8ZrHPJvK3y"
},
"source": [
"### Tensory - reprezentacje pierwszych wersów wierszy testowych."
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "oy4-_mVxvK3y",
"outputId": "132585d7-5fb1-42b1-8127-e31febec14c8"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"1490\n",
"torch.Size([109])\n",
"tensor([ 8, 22, 12, 7, 3, 9, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25,\n",
" 12, 21, 34, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3,\n",
" 4, 2, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 21, 1,\n",
" 4])\n"
]
}
],
"source": [
"x_test = []\n",
"for indeks_wersu_pierwszego in range(len(poczatki_testowe_in)):\n",
" poczatki = poczatki_testowe_in\n",
" konce = konce_testowe_in\n",
" tony = tony_testowe_in\n",
" niezerowe_poczatki = [p for p in poczatki[indeks_wersu_pierwszego] if p>0.0]\n",
" poczatek_ostatniego_znaku = niezerowe_poczatki[len(niezerowe_poczatki)-1]\n",
" koniec_ostatniego_znaku = konce[indeks_wersu_pierwszego][len(niezerowe_poczatki)-1]\n",
" ton_ostatniego_znaku = tony[indeks_wersu_pierwszego][len(niezerowe_poczatki)-1]\n",
" x_test.append(torch.cat(\n",
" (\n",
" #zanurzenia_testowe_in[indeks_wersu_pierwszego],\n",
" torch.from_numpy(poczatki_testowe_in[indeks_wersu_pierwszego]),\n",
" torch.from_numpy(konce_testowe_in[indeks_wersu_pierwszego]),\n",
" torch.from_numpy(tony_testowe_in[indeks_wersu_pierwszego]),\n",
" torch.from_numpy(numpy.array([len(niezerowe_poczatki)])),\n",
" torch.from_numpy(numpy.array([poczatek_ostatniego_znaku])),\n",
" torch.from_numpy(numpy.array([koniec_ostatniego_znaku])),\n",
" torch.from_numpy(numpy.array([ton_ostatniego_znaku]))\n",
" )\n",
" ))\n",
"print(len(x_test))\n",
"print(x_test[0].shape)\n",
"print(x_test[0])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "kKf0Ob0CvK3y"
},
"source": [
"### Tensory - reprezentacje drugich wersów wierszy testowych."
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "khi6U9dlvK3y",
"outputId": "3b1a4673-1af2-4832-8138-fa7d99139f76"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"1490\n",
"torch.Size([109])\n",
"tensor([16, 22, 22, 22, 22, 13, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24,\n",
" 21, 1, 23, 25, 32, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1,\n",
" 3, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 17, 20,\n",
" 1])\n"
]
}
],
"source": [
"y_test = []\n",
"for indeks_wersu_pierwszego in range(len(poczatki_testowe_out)):\n",
" poczatki = poczatki_testowe_out\n",
" konce = konce_testowe_out\n",
" tony = tony_testowe_out\n",
" niezerowe_poczatki = [p for p in poczatki[indeks_wersu_pierwszego] if p>0.0]\n",
" poczatek_ostatniego_znaku = niezerowe_poczatki[len(niezerowe_poczatki)-1]\n",
" koniec_ostatniego_znaku = konce[indeks_wersu_pierwszego][len(niezerowe_poczatki)-1]\n",
" ton_ostatniego_znaku = tony[indeks_wersu_pierwszego][len(niezerowe_poczatki)-1]\n",
" y_test.append(\n",
" torch.cat(\n",
" (\n",
" # zanurzenia_testowe_out[indeks_wersu_pierwszego],\n",
" torch.from_numpy(poczatki_testowe_out[indeks_wersu_pierwszego]),\n",
" torch.from_numpy(konce_testowe_out[indeks_wersu_pierwszego]),\n",
" torch.from_numpy(tony_testowe_out[indeks_wersu_pierwszego]),\n",
" torch.from_numpy(numpy.array([len(niezerowe_poczatki)])),\n",
" torch.from_numpy(numpy.array([poczatek_ostatniego_znaku])),\n",
" torch.from_numpy(numpy.array([koniec_ostatniego_znaku])),\n",
" torch.from_numpy(numpy.array([ton_ostatniego_znaku]))\n",
" )\n",
" )\n",
" )\n",
"print(len(y_test))\n",
"print(y_test[0].shape)\n",
"print(y_test[0])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "6xAhS7JFvK3-"
},
"source": [
"## Wejście do sieci neuronowej."
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "9tjiqjaMvK3_",
"outputId": "86050073-948d-4f3e-d0b3-bd4bf912f89e"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"7450\n",
"tensor([ 8, 22, 12, 7, 3, 9, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25,\n",
" 12, 21, 34, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3,\n",
" 4, 2, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 21, 1,\n",
" 4, 16, 22, 22, 22, 22, 13, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 24, 21, 1, 23, 25, 32, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,\n",
" 1, 3, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 17,\n",
" 20, 1])\n",
"7450\n",
"1\n"
]
}
],
"source": [
"X_test = []\n",
"Y_test = []\n",
"for indeks_wersu_drugiego in range(len(x_test)):\n",
" indeksy = sample(range(len(y_test)), 5)\n",
" if indeks_wersu_drugiego not in indeksy:\n",
" indeksy[0] = indeks_wersu_drugiego\n",
" for k in indeksy:\n",
" X_test.append(\n",
" torch.cat(\n",
" (x_test[indeks_wersu_drugiego], y_test[k])\n",
" )\n",
" )\n",
" if indeks_wersu_drugiego==k:\n",
" Y_test.append(1)\n",
" else:\n",
" Y_test.append(0)\n",
"\n",
"print(len(X_test))\n",
"print(X_test[0])\n",
"print(len(Y_test))\n",
"print(Y_test[0])"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "vRLr2jePvK3_"
},
"source": [
"## Przewidywania sieci neuronowych."
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {
"id": "aV0NMXntvK3_"
},
"outputs": [],
"source": [
"przewidywania_klasyfikatora = klasyfikator.predict(X_test)"
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "8xAk6jYsvK3_",
"outputId": "18748f71-ff77-44fc-f33d-1ebba630d4e7"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"-1.5850154355095964 1.513799312237276 0.20544654697588927 0.17125274897300372\n"
]
}
],
"source": [
"przewidywania_regresora = regresor.predict(X_test)\n",
"print(numpy.min(przewidywania_regresora),numpy.max(przewidywania_regresora),numpy.mean(przewidywania_regresora),numpy.median(przewidywania_regresora))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Kp830QiMvK4A"
},
"source": [
"### Dokładność na przygotowanych danych testowych."
]
},
{
"cell_type": "code",
"execution_count": 126,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "jYKOPOQcvK4A",
"outputId": "4daa9be0-8ed0-482c-c49e-27bb2f7fa5b9"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"0.8555704697986577\n"
]
}
],
"source": [
"### MLPClassifier\n",
"\n",
"licznik = 0\n",
"mianownik = 0\n",
"for indeks_wersu_pierwszego in range(len(przewidywania_klasyfikatora)):\n",
" mianownik+=1\n",
" if przewidywania_klasyfikatora[indeks_wersu_pierwszego]==Y_test[indeks_wersu_pierwszego]:\n",
" licznik+=1\n",
"\n",
"print(licznik/mianownik*1.0)"
]
},
{
"cell_type": "code",
"execution_count": 127,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "BS6KtYRQvK4A",
"outputId": "0b50b72f-96d9-4482-d80e-8d68c57f329a"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"0.8377181208053691\n",
"0.8087248322147651\n"
]
}
],
"source": [
"### MLPRegressor\n",
"\n",
"# Dopasowanie powyżej 0.5\n",
"licznik = 0\n",
"mianownik = 0\n",
"for indeks_wersu_pierwszego in range(len(przewidywania_regresora)):\n",
" mianownik+=1\n",
" if Y_test[indeks_wersu_pierwszego]==1 and przewidywania_regresora[indeks_wersu_pierwszego]>0.5:\n",
" licznik+=1\n",
" elif Y_test[indeks_wersu_pierwszego]==0 and przewidywania_regresora[indeks_wersu_pierwszego]<0.5:\n",
" licznik+=1\n",
"\n",
"print(licznik/mianownik*1.0)\n",
"\n",
"#Dopasowanie powyżej 0.9\n",
"licznik = 0\n",
"mianownik = 0\n",
"for indeks_wersu_pierwszego in range(len(przewidywania_regresora)):\n",
" mianownik+=1\n",
" if Y_test[indeks_wersu_pierwszego]==1 and przewidywania_regresora[indeks_wersu_pierwszego]>0.9:\n",
" licznik+=1\n",
" elif Y_test[indeks_wersu_pierwszego]==0 and przewidywania_regresora[indeks_wersu_pierwszego]<0.9:\n",
" licznik+=1\n",
"\n",
"print(licznik/mianownik*1.0)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "jQ4yFJcAvK4A"
},
"source": [
"## Metryka oceniająca proponowanie przez model drugiego wersu.\n",
"### Jeżeli wśród propozycji nie ma spodziewanego poprawnego wersu, metryka przyjmuje minimalną wartość 0,0.\n",
"### Im mniej błędnych propozycji , tym wyższy wynik metryki.\n",
"### Jeżeli model proponuje tylko jeden wers i jest on poprawny, metryka przyjmuje maksymalną wartość 1,0."
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {
"id": "5yJbzXdnvK4B"
},
"outputs": [],
"source": [
"def jagosz_score(spodziewany_wers,proponowane_wersy):\n",
" if spodziewany_wers in proponowane_wersy:\n",
" licznik = 1\n",
" else:\n",
" licznik = 0\n",
" mianownik = len(proponowane_wersy)\n",
" if mianownik==0:\n",
" mianownik=1\n",
" return licznik/mianownik*1.0"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "5W-lowtuvK4B"
},
"source": [
"### Wersja metryki dla całego zbioru."
]
},
{
"cell_type": "code",
"execution_count": 129,
"metadata": {
"id": "vm6Np4RxvK4B"
},
"outputs": [],
"source": [
"def jagosz_score_dla_zbioru(krotki):\n",
" licznik = 0\n",
" mianownik = 0\n",
"\n",
" for k in krotki:\n",
" spodziewany_wers = k[0]\n",
" proponowane_wersy = k[1]\n",
" if spodziewany_wers in proponowane_wersy:\n",
" licznik += 1\n",
" mianownik += len(proponowane_wersy)\n",
"\n",
" if mianownik==0:\n",
" return 0\n",
" else:\n",
" return licznik/mianownik*1.0"
]
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {
"id": "mFhtdrE6vK4B"
},
"outputs": [],
"source": [
"wybrane_dane_testowe = sample(range(len(x_test)),10)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "OQ-1vBedvK4C"
},
"source": [
"## MLPClassifier\n",
"### Proponuje wszystkie wersy, dla których ocena modelu to 1."
]
},
{
"cell_type": "code",
"execution_count": 131,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "_QX8BxFOvK4C",
"outputId": "5b216c48-ecf2-414a-b0e4-ce520691aba7"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"wers pierwszy:\t\t 仁义自修君子安我\n",
"poprawny wers drugi:\t 诗礼之教家人利贞\n",
"\n",
"proponowane drugie wersy: ['丰碑万古纪殊勋', '情牵大地春满人间', '珠圆玉润入口皆甜', '德行梦笔开盛世新篇', '柔水月光披野地天穹', '闲耽笔墨自从容', '一湾碧水日如金', '浮沉历尽许由谁', '人情世故亦须明', '三番顾茅庐皇叔牵龙', '春光入户户新幸福多', '花明柳媚湖上长春', '调一湖春色染绿江淮', '发者斗芳梅葩早帅焉', '一联争首榜元眼花胪', '深恩彻骨万代常萦', '堤前柳浪露春光', '湖山叠韵入我诗囊', '辞雄子建赋拟相如', '行廉拒腐执法如山', '国运弥盛史弥远', '瑞通阆苑琼楼兴百轩', '苗兴汉夏望族振乾坤', '砖雕雕壁画砖马腾空', '歌酣万户九域报长春', '千幅对联红透时光', '赋浓夏盛寓秋实', '东床配西席不是东西', '飞鸿远浦一时惊', '就门外汉示不二门', '天心阁阁内鸽鸽飞阁不飞', '捉刀李白斩斯文', '桨声翻学海海载苦舟', '月光如户窥佳人', '小康致富富人间', '直播日照时时精彩', '共赏芦溪水高下相倾', '喜传桃谷峪马叫人欢', '满腔忧愤铸诗魂', '荷描夏画日钤章', '小桃几树鸟啼红', '五光十色文字之华', '柳垂水面翠溶南北风', '伟雄心志白鹤相知', '小金龙瑞雪兆丰年', '松风竹韵多抒情', '当辨忠奸岂可负全民', '心中无欲不争春', '樽彝错杂古道犹存', '诗礼之教家人利贞', '出门去白面书生', '中古生华易古往冬', '攀龙附凤欲攀彩凤缘', '地连南北日星恒久晖', '吸烟无益肺摧残', '蔼峰亦寄诗仙情', '杯中寂寞不曾空', '油田崛起为生产护航', '综一代典成一家言', '辩雕春囿德莹秋天', '碧峰犹冷寺前春', '微言明义苦谏纠偏', '尘凡皆妄昧贪嗔痴愚', '草逢蓬室至家中', '平野百里高山九重', '诗篇避俗不酬人', '官吏非全力吏全力官', '利人始是大修行', '鸣钟食鼎甘田土之出', '手携一集质于通人', '方塘九夏溢荷香', '扬风遗泽仁厚人家', '风中落叶泣无声', '雅情雅韵仙客有约', '开枰先弃是非心', '陈天保颂代地道终', '飞腾雅典腾飞环球', '天明独倚楼坐到黄昏', '塞上无诗诗圣上乘诗', '不甘卖命换虚名', '花样年华联若洒可钦', '寄人篱下始知求', '误将弟子入迷宫', '悲秋远去一孤鸿', '盛世兴盛事鹏举云天']\n",
"czy poprawny wers jest pośród proponowanych wersów?: True\n",
"liczba proponowanych wersów: 85\n",
"\n",
"wynik przyjętej metryki: 0.011764705882352941\n",
"\n",
"--------------------------------------------------\n",
"\n",
"wers pierwszy:\t\t 雪落千山静\n",
"poprawny wers drugi:\t 冰封万水寒\n",
"\n",
"proponowane drugie wersy: ['人懒几生尘', '清心长保真', '花香不在多', '草木已含英', '松摇古谷风', '鹤踪上潭冰', '兰馨溢神州', '四海奋人心', '贤媳举扇陪', '莫愁女儿红', '春归柳色红', '此味几人同', '寻源路不迷', '头彩出中原', '绿野寄仙踪', '玉律始调阳', '梦舟载月明', '难教白日闲', '慢煲绿豆汤', '智者忍违缘', '真风再发扬', '梅韵贺新年', '一街太平歌', '桃花自美容', '诗带好风吟', '行藏固有期', '大功扫叛臣', '赤水得玄珠', '诗兴不无神', '月分老梅香', '禹甸沐春风', '徒临洗药泉', '一樽欢暂同', '案头月一樽', '英雄是达人', '木栽门内闲', '移山志不忘', '碧柳锁长亭', '风定水无波', '醉后赋离骚', '胡蝶飞南园', '长河没晓天', '三江福寿图', '脉脉万重心', '高处看浮云', '两乡明月心', '高悬不畏风', '牖含遍岭春', '少年是网虫', '豆灯照墨新', '水凉难泡茶', '中华共颂贤臣', '户内美色呈辉', '府藏石铫图', '宛在岱中行', '王府池子深', '夕观沧海云', '酒醉好题诗', '梅迎跃进春', '箫声向远天', '莫向外头看', '思量枕席功夫', '松风如在弦', '行吟必向民心', '这边环境安宁', '羊年事事吉祥', '年年有盼头', '碧浪皱红霞', '山深虎迹踪', '衣兴露脐', '伴梦眠老屋', '民以食为天', '初日临春虚', '惩凶儆效尤', '梅花落我肩', '搴舟破晓风', '衣间不带尘', '人我法皆空', '何防凿壁偷', '艺高大胆人', '花荣上海人', '天地月常圆', '敞襟天地宽', '起宏图', '开光佛自由', '时泰喜黎民', '月轮碾古今', '而今当宝存', '心静自然凉', '山转路无穷', '白日奈我何', '春心蝶最知', '千花夹寺门', '无肉也能行', '夜寂鸟啼空', '江涌古今潮', '尝鲜食鱼羊', '烽火起云间', '塞外朔风寒', '巧拙尚相悬', '中庭松桂姿', '品德讲道德', '秋波我梦吟', '香飘十里风', '莺歌鹧鸪天']\n",
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
"liczba proponowanych wersów: 105\n",
"\n",
"wynik przyjętej metryki: 0.0\n",
"\n",
"--------------------------------------------------\n",
"\n",
"wers pierwszy:\t\t 莫漫三槐羡王自\n",
"poprawny wers drugi:\t 须先百忍学张公\n",
"\n",
"proponowane drugie wersy: ['尘烟已远隐青山', '人如无欲意何求', '他乡月好俺思亲', '丰碑万古纪殊勋', '珠圆玉润入口皆甜', '毅力凝成跨海桥', '海深寻秘展雄才', '青山醉向一樽横', '丰年留客足鸡豚', '美德重红幸福门', '万般幻态杳随风', '须先百忍学张公', '柔水月光披野地天穹', '珠帘难掩月多情', '铮铮梅蕾半含春', '莺燕对舞艳阳天', '青山四面纳千流', '闲庭信步哼欢歌', '千般柳絮游子心', '殷殷老叶护花红', '偶观雨燕栖寒檐', '居身常抱玉壶清', '一腔热血死难消', '而今初信及笄年', '红灯素户好风光', '江河自古向东流', '清风两袖带回家', '一湾碧水日如金', '三阳开泰颂廉明', '锦葵昂面为迎光', '看三国志欲何为', '江山犹得助诗豪', '人情世故亦须明', '早将秋韵入诗怀', '千杯浊酒醉恒长', '庖丁自有解牛刀', '扬州十里小红庐', '云本无心醉雨嫣', '心牵雨骤夜归人', '规矩者应晓方圆', '青灯久作故人看', '猴腾广宇绽琼花', '茫然回首奈何桥', '三联书韵醉今生', '风临荷盏窃清香', '豪杰意气傲云天', '璋玉无瑕耀祥光', '花好月圆夜长明', '心宽纳海老夫能', '闲愁起处是红尘', '扬鞭跃马马行空', '然乎者一字乾坤', '且求秋实果一园', '我犹未脱长康痴', '鹰翔蓝宇戏搏云', '甘棠播爱岁流金', '胸中消尽是非心', '篆铭山石荡浮华', '英年奋进惜时光', '春江柳线乱弹琴', '文联叶问斩妖魔', '梅香葱岭缀长虹', '仙人指路点迷津', '万般气象壮龙年', '催开玉蕊艳无边', '清塘浴月鹤逐风', '时急方须济世才', '国展宏图烈士欣', '卷帘烧烛看梅花', '圆缺朗月也浮名', '梧桐叶上得秋声', '五色龙溪抱江流', '仰头天幕挂霓虹', '新朋正续进行时', '一指清凉尽染秋', '小小儿郎立路中', '无田有业不为贫', '一溪柳绿到谁家', '俨然天竺古先生', '西湖乡梦约谁寻', '听竹尤增几许清', '武夷阳羡品俱馨', '堪求五体保安康', '一川杨柳笼和风', '韧节有意杜虚名', '两个老头打秋风', '直抒快意墨千秋', '名享三奇显祖公', '常将劲节负秋霜', '国持德政著宏篇', '阳朔沿水显花荣', '善男信女拜观音', '举杯邀月到凡尘', '兰心未老梦如初', '伤心羁旅断愁肠', '制笙立乐业兴邦', '亭藏绿荫万般幽', '英雄力困也求人', '神驹腾跃吉祥年', '夜灯勤礼塔中仙', '飞鸿远浦一时惊', '相思一点老了谁', '花间酌酒赏蝶飞', '风雨人生鉴知音', '清泉有趣自通融', '山长水远恨重重', '游春岂料梦成真', '年丰人寿沁诗声', '浮舟水面尽飞花', '三令五申还有贪', '霜飞两鬓孔明灯', '重回津渡觅缠绵', '月光如户窥佳人', '得心应手手头宽', '子孙常读未烧书', '小康致富富人间', '情凝大地重如山', '元兴世盛展宏图', '优良业绩绩可观', '长将远景引天边', '相如廉颇璧千钧', '满腔忧愤铸诗魂', '万般殷切候佳音', '身前淡泊莫非尘', '行吟战马啸征尘', '关羽无停觅长兄', '春风惠我也惠人', '小可参禅入几分', '常教翰墨作鼓吹', '荷描夏画日钤章', '澄天月隐星今宵', '空海星辰宇宙流', '小桃几树鸟啼红', '皇城玉阙夕阳斜', '花贴幼子悦童心', '四行热泪洒苍颜', '双琴欣鼓杏花天', '人间重义树新风', '轻舟破浪过千山', '修身松竹有高风', '马舞龙韵续华章', '山影盘龙月钓珠', '风吹杨柳翠还柔', '一丛老竹梦于胸', '时临峻岭采浮云', '关外又开一朵奇葩', '幽梦一帘总是春', '崇廉尚德岛尚书', '钻杆穿地唱欢歌', '专门收拾搞重婚', '松风竹韵多抒情', '再将粉黛沁于宣', '秉公执法树廉风', '当惊阁老好风光', '心中无欲不争春', '尚德定可净人心', '吹牛煮海火收兵', '凝才情血汗磨刀', '梅花傲雪迓长春', '出门去白面书生', '江郎梦里得犹神', '金声玉振展<E68CAF><E5B195>
"czy poprawny wers jest pośród proponowanych wersów?: True\n",
"liczba proponowanych wersów: 286\n",
"\n",
"wynik przyjętej metryki: 0.0034965034965034965\n",
"\n",
"--------------------------------------------------\n",
"\n",
"wers pierwszy:\t\t 到此酒垆应更好\n",
"poprawny wers drugi:\t 问渠嘉树是谁栽\n",
"\n",
"proponowane drugie wersy: ['尘烟已远隐青山', '而今华夏振雄风', '人如无欲意何求', '丰碑万古纪殊勋', '问渠嘉树是谁栽', '珠圆玉润入口皆甜', '毅力凝成跨海桥', '三杯白酒乐成仙', '青山醉向一樽横', '丰年留客足鸡豚', '美德重红幸福门', '珠帘难掩月多情', '莺燕对舞艳阳天', '青山四面纳千流', '雅文相识好文人', '千般柳絮游子心', '殷殷老叶护花红', '满头霜雪和新梅', '一腔热血死难消', '梦离幻境是非多', '江河自古向东流', '一湾碧水日如金', '小楼吹砌玉生寒', '锦葵昂面为迎光', '看三国志欲何为', '闹中取静看擂台', '庖丁自有解牛刀', '扬州十里小红庐', '心牵雨骤夜归人', '青山一座共云闲', '猴腾广宇绽琼花', '茫然回首奈何桥', '花狎春云露搅和', '青山绿水皆低头', '三联书韵醉今生', '更移阳朔七堆山', '豪杰意气傲云天', '璋玉无瑕耀祥光', '花好月圆夜长明', '心宽纳海老夫能', '闲愁起处是红尘', '鸿才立世展鸿图', '然乎者一字乾坤', '青牛出谷李成熟', '鹰翔蓝宇戏搏云', '甘棠播爱岁流金', '篆铭山石荡浮华', '一拍之下就轻生', '弄潮帆影港城新', '莲花亲水意崇廉', '英年奋进惜时光', '放歌音厚是功深', '故人书自日边来', '又何必三日闻香', '文联叶问斩妖魔', '万般气象壮龙年', '催开玉蕊艳无边', '清塘浴月鹤逐风', '圆缺朗月也浮名', '暖香十里软莺声', '闲敲棋子落灯花', '新朋正续进行时', '一指清凉尽染秋', '日移松影过禅床', '无田有业不为贫', '俨然天竺古先生', '西湖乡梦约谁寻', '纵情狂乱毁根基', '韧节有意杜虚名', '两个老头打秋风', '追求亮丽美人图', '腐败必被人民纠', '鼓瑟难得悦美人', '国持德政著宏篇', '阳朔沿水显花荣', '千般爱意眼中留', '好将长铗护黎民', '伤心羁旅断愁肠', '英雄力困也求人', '且由明月洗尘心', '金花覆没白丢盘', '神驹腾跃吉祥年', '翠柳清风伴杏娇', '万家台笠雨声甘', '蛇听燕语颂春光', '相思一点老了谁', '花间酌酒赏蝶飞', '磋砣无奈怨摽梅', '清泉有趣自通融', '山长水远恨重重', '情如水淡话沧桑', '捉刀李白斩斯文', '重回津渡觅缠绵', '不言第一海胸襟', '乌啼古树惹乡愁', '月光如户窥佳人', '雪漫大地秋光失', '新梅雪橇雅幽行', '官居宰相望王侯', '元兴世盛展宏图', '一意孤行不回头', '优良业绩绩可观', '相如廉颇璧千钧', '江水源源发电来', '行吟战马啸征尘', '关羽无停觅长兄', '春风惠我也惠人', '小可参禅入几分', '常教翰墨作鼓吹', '澄天月隐星今宵', '空海星辰宇宙流', '小桃几树鸟啼红', '红旗漫卷息狼烟', '兄弟同吟夜雨陪', '双琴欣鼓杏花天', '人间重义树新风', '轻舟破浪过千山', '汉高祖业耀千秋', '横波一顾白云旁', '修身松竹有高风', '晓霞含愁看早梅', '风吹杨柳翠还柔', '幽梦一帘总是春', '崇廉尚德岛尚书', '钻杆穿地唱欢歌', '专门收拾搞重婚', '松风竹韵多抒情', '当惊阁老好风光', '黄叶飘零比较烦', '尚德定可净人心', '吹牛煮海火收兵', '凝才情血汗磨刀', '梅花傲雪迓长春', '出门去白面书生', '江郎梦里得犹神', '停琴问月正归乡', '文中已现老成心', '何堪心乱雨难读', '迎来信誉达三江', '迎春老树发新芽', '云压水岸浪逐云', '还将歌赋寄相思', '吕布吕蒙常用兵', '相思不减病扶墙', '杜曲幸有桑麻田', '金龙对舞戏中来', '三春经纬织民图', '回家时不见秋鸿', '满园丹桂早飘香', '风吹枫落枫随风', '蔼峰亦寄诗仙情', '江山忧患老英雄', '火牛曾胜敌千军', '沉年古木韵临风', '落蕊黯留一挽香', '度日如年席卷八荒', '是非自古要三思', '寻思流水意如何', '风流人物看今朝', '横窗疏影绽梅花', '千篇一律竞同声', '人心锁锁锁还开',
"czy poprawny wers jest pośród proponowanych wersów?: True\n",
"liczba proponowanych wersów: 250\n",
"\n",
"wynik przyjętej metryki: 0.004\n",
"\n",
"--------------------------------------------------\n",
"\n",
"wers pierwszy:\t\t 漫向楼台寻婉转\n",
"poprawny wers drugi:\t 重回津渡觅缠绵\n",
"\n",
"proponowane drugie wersy: ['尘烟已远隐青山', '人如无欲意何求', '丰碑万古纪殊勋', '德操应效柏和松', '问渠嘉树是谁栽', '珠圆玉润入口皆甜', '海深寻秘展雄才', '丰年留客足鸡豚', '美德重红幸福门', '珠帘难掩月多情', '铮铮梅蕾半含春', '莺燕对舞艳阳天', '雅文相识好文人', '一心二用两头空', '闲庭信步哼欢歌', '千般柳絮游子心', '映日桑榆重晚晴', '殷殷老叶护花红', '偶观雨燕栖寒檐', '满头霜雪和新梅', '一腔热血死难消', '梦离幻境是非多', '而今初信及笄年', '红灯素户好风光', '江河自古向东流', '清风两袖带回家', '酒醉不如伴月眠', '锦葵昂面为迎光', '浮沉历尽许由谁', '看三国志欲何为', '江山犹得助诗豪', '人情世故亦须明', '闹中取静看擂台', '早将秋韵入诗怀', '庖丁自有解牛刀', '扬州十里小红庐', '心牵雨骤夜归人', '与君同作太平人', '青山一座共云闲', '猴腾广宇绽琼花', '言少言多尽美谈', '好留明月九千秋', '茫然回首奈何桥', '青山绿水皆低头', '三联书韵醉今生', '更移阳朔七堆山', '风临荷盏窃清香', '豪杰意气傲云天', '璋玉无瑕耀祥光', '花好月圆夜长明', '心宽纳海老夫能', '闲愁起处是红尘', '鸿才立世展鸿图', '扬鞭跃马马行空', '然乎者一字乾坤', '且求秋实果一园', '南燕离巢北国春', '青牛出谷李成熟', '我犹未脱长康痴', '鹰翔蓝宇戏搏云', '甘棠播爱岁流金', '胸中消尽是非心', '篆铭山石荡浮华', '一拍之下就轻生', '凌云揽月步高科', '弄潮帆影港城新', '莲花亲水意崇廉', '英年奋进惜时光', '放歌音厚是功深', '春江柳线乱弹琴', '故人书自日边来', '文联叶问斩妖魔', '梅香葱岭缀长虹', '学问无穷博古今', '仙人指路点迷津', '催开玉蕊艳无边', '国运弥盛史弥远', '清塘浴月鹤逐风', '卷帘烧烛看梅花', '圆缺朗月也浮名', '大河滚滚尽淘沙', '梧桐叶上得秋声', '五色龙溪抱江流', '闲敲棋子落灯花', '仰头天幕挂霓虹', '新朋正续进行时', '俨然天竺古先生', '武夷阳羡品俱馨', '拨弦弹水月偏题', '纵情狂乱毁根基', '笛声浅扣暗推窗', '一川杨柳笼和风', '韧节有意杜虚名', '两个老头打秋风', '洗出芙蓉九点青', '半空摇晃寻常仁', '追求亮丽美人图', '腐败必被人民纠', '名享三奇显祖公', '鼓瑟难得悦美人', '明月来时渚落霜', '常将劲节负秋霜', '国持德政著宏篇', '阳朔沿水显花荣', '善男信女拜观音', '千般爱意眼中留', '好将长铗护黎民', '兰心未老梦如初', '世间最难得弟兄', '亭藏绿荫万般幽', '英雄力困也求人', '且由明月洗尘心', '金花覆没白丢盘', '薄酒三杯吊芳魂', '普荫全球亿万生', '神驹腾跃吉祥年', '孔圣有才死后尊', '飞鸿远浦一时惊', '往事依然笔底新', '四方称霸一魔方', '相思一点老了谁', '花间酌酒赏蝶飞', '磋砣无奈怨摽梅', '风雨人生鉴知音', '清泉有趣自通融', '山长水远恨重重', '游春岂料梦成真', '年丰人寿沁诗声', '浮舟水面尽飞花', '情如水淡话沧桑', '霜飞两鬓孔明灯', '重回津渡觅缠绵', '不言第一海胸襟', '月光如户窥佳人', '半帘秋梦鸟也酥', '对苑繁华万蕾新', '得心应手手头宽', '子孙常读未烧书', '雪漫大地秋光失', '情凝大地重如山', '常想旁通不对头', '一意孤行不回头', '优良业绩绩可观', '路远始于跬步间', '长将远景引天边', '相如廉颇璧千钧', '满腔忧愤铸诗魂', '万般殷切候佳音', '身前淡泊莫非尘', '江水源源发电来', '行吟战马啸征尘', '关羽无停觅长兄', '春风惠我也惠人', '扁舟轻荡水云长', '常教翰墨作鼓吹', '澄天月隐星今宵', '空海星辰宇宙流', '小桃几树鸟啼红', '花贴幼子悦童心', '红旗漫卷息狼烟', '思量枕席功夫', '双琴欣鼓杏花天', '人<>
"czy poprawny wers jest pośród proponowanych wersów?: True\n",
"liczba proponowanych wersów: 319\n",
"\n",
"wynik przyjętej metryki: 0.003134796238244514\n",
"\n",
"--------------------------------------------------\n",
"\n",
"wers pierwszy:\t\t 晋水育菩提叶叠千层呈瑞气\n",
"poprawny wers drugi:\t 玉兰生妙境花开十里献诗情\n",
"\n",
"proponowane drugie wersy: ['慈心抒自在手慈眼慈甘露慈', '冬雪心明落叶入泥还育春', '情歌依旧仍随秋水染夕阳', '陈家颜割落耳朵颜面才是东家', '千师作赋笔下新村韵有余', '织天织地织出人间一个家', '一心一意书中可造万般星才', '冰天雪地寒鱼破镜钓江翁', '黄山挥笔新春祝酒绘宏图', '穷途哭恸阮籍猖狂独咏怀', '杏林栽福地仁心妙术起沉疴', '十年非忘本学子该当底气足', '台阶通化境佛寺巍巍气韵深', '一僧击暮鼓南无长诵保平安', '有彩有华偶得佳联少雅人', '英灵不昧览兹蹇蹇匪躬愚', '王粲传遣词备悉预须认定大纲', '八方铺锦绣紫燕娇啼羡物华', '春溪赴梦入径带来山外情', '一匕图始皇自有我易水悲歌', '挥洒一身才气令岁月流芳', '寻梅雪岭无畏寒侵自有香', '待儿曹婚娶莼羹鲈脍慷慨知还', '瞧瞧无品文人赊去空格拍马填', '窗移晌午红蕖深处妹撑船', '琼花瑶叶雨浥芙蕖冉冉香', '皖吟风徽歌韵老村美景若诗', '离别时章柳折残山花静待来春', '中华娇子红塔山云烟贵烟', '字成一体大戟长枪跌宕书', '说地谈天妙语千词趣味生', '人言虽可信但防渭水混泾江', '年年七夕望月观星念恋人', '强国兴邦关注三农百业展新猷', '勤习十载几案当知学子心', '兴亡在抱百千年史鉴咏冰心', '梅花千万点报得人间锦绣春', '鲲鹏翔瀚宇激越高昂自在身', '楚山飞楚曲八方唱就楚风淳', '旌旗飞舞千桡激起粤精神', '玉轮升碧海清辉广照出天然', '壮怀逸兴盛世鸿儒聚鹭园', '终日惟杜门蔬食经卷绳床', '只身游燕赵淡泊无定水云舒', '啜甘须忆苦纵登高位犹纳清风', '登高极目从兹俗虑自消沉', '千家纳福转型跨越晋城兴', '嫩竹舒新绿倚遍春风翠袖寒', '心描山水情一枝一叶总关情', '心朝北斗祖国万岁路铺金', '新年缔良缘月圆人寿谱新歌', '平台屹屹出水蛟腾碧浪中', '四十年苦戍曾教瀚海变桑田', '风亦软云亦淡独怜一地月华', '万般思绪华章雅集自陶然', '为环球献瑞沧桑洗礼万年冰', '指告后昆代代永铭国耻激扬', '更漏子蝶恋花千滴满见泪沙流', '党承柱石九州四海举红旗', '剩有渊明趣随宜对秋色持醪', '浓妆淡抹秋暮霜枫写意诗', '春光照大地九州共绘小康图', '心游翰海叹这般风月似醉似痴', '张皇祖道哀丝豪竹别离间', '千秋华夏千秋业更需龙裔担当', '玉兰生妙境花开十里献诗情', '风和牵细浪托盘荷畔捧玉珠', '对对总求工自对需如互对工', '淡烟浮动摇魂湖月对姮娥', '汇九霄圣脉犀江溢彩梦园芳', '太白泼墨天上月云石上诗', '俗子凡胎从来市井最人情', '山留菩境石鼓一悬梦万年', '文辉百载一轮旭日照庭兰', '裁诗月下诗成月下月尤明', '梅影横窗瘦南枝微弄雪精神', '福音云外播心泉涌玉接灵源', '做戏人看戏人戏内戏人看人', '涵秀沐风雨春风化雨润人心', '何须斗气眼下齐心破一曹', '壮大联坛一片云霞灿锦城', '胸怀税务戮力耕耘收税献丹心', '秉公办事牢记四知品自高', '龙狮舞彩八方乐奏颂长春', '锤镰记取红色党旗血染成', '秦有十八子笔墨抒意论春秋', '德宏章贡修文悦礼敦古铄今', '蔺廉有隙终对刎颈死生交', '雪绽一树花漫园寒梅点点香', '动车牵北南绿城煤城双轨接龙', '熄八年烽火侵华历史鉴千秋', '荷叶一池满铺开澄碧坦荡人心', '辉煌禹甸水漾芙蕖万象新', '盼美丽中国收入倍增成就小康']\n",
"czy poprawny wers jest pośród proponowanych wersów?: True\n",
"liczba proponowanych wersów: 94\n",
"\n",
"wynik przyjętej metryki: 0.010638297872340425\n",
"\n",
"--------------------------------------------------\n",
"\n",
"wers pierwszy:\t\t 国运逢春好\n",
"poprawny wers drugi:\t 民心向党红\n",
"\n",
"proponowane drugie wersy: ['人懒几生尘', '清心长保真', '花香不在多', '草木已含英', '松摇古谷风', '鹤踪上潭冰', '兰馨溢神州', '四海奋人心', '珍簟展方床', '贤媳举扇陪', '冰封万水寒', '莫愁女儿红', '春归柳色红', '此味几人同', '寻源路不迷', '头彩出中原', '玉律始调阳', '梦舟载月明', '难教白日闲', '慢煲绿豆汤', '倾城倾国', '智者忍违缘', '真风再发扬', '梅韵贺新年', '一街太平歌', '桃花自美容', '诗带好风吟', '行藏固有期', '大功扫叛臣', '赤水得玄珠', '诗兴不无神', '月分老梅香', '禹甸沐春风', '徒临洗药泉', '一樽欢暂同', '雪厚松袅云', '案头月一樽', '英雄是达人', '木栽门内闲', '庙略久论兵', '重担重担人', '碧柳锁长亭', '风定水无波', '胡蝶飞南园', '世态笑炎凉', '长河没晓天', '风笔绘春秋', '三江福寿图', '脉脉万重心', '高处看浮云', '两乡明月心', '高悬不畏风', '牖含遍岭春', '少年是网虫', '豆灯照墨新', '水凉难泡茶', '中华共颂贤臣', '户内美色呈辉', '府藏石铫图', '池浅韵牵波', '宛在岱中行', '王府池子深', '寺与山争鲜', '夕观沧海云', '眉月静横窗', '酒醉好题诗', '梅迎跃进春', '箫声向远天', '莫向外头看', '思量枕席功夫', '家庭祥和', '松风如在弦', '这边环境安宁', '醉酒吐真情', '年年有盼头', '碧浪皱红霞', '山深虎迹踪', '衣兴露脐', '民心向党红', '伴梦眠老屋', '禅味涤心胸', '民以食为天', '初日临春虚', '恨别鸟惊心', '朝槿散幽香', '惩凶儆效尤', '梅花落我肩', '陕州人杰灵', '鸟语落花山', '搴舟破晓风', '百岭见千娇', '衣间不带尘', '小曲品三春', '人我法皆空', '何防凿壁偷', '艺高大胆人', '花荣上海人', '天地月常圆', '红雨浸黄云', '敞襟天地宽', '开光佛自由', '时泰喜黎民', '月轮碾古今', '而今当宝存', '春入鸟能言', '偏遇有情人', '心静自然凉', '山转路无穷', '白日奈我何', '春心蝶最知', '千花夹寺门', '无肉也能行', '夜寂鸟啼空', '江涌古今潮', '尝鲜食鱼羊', '烽火起云间', '塞外朔风寒', '巧拙尚相悬', '两手作生涯', '中庭松桂姿', '云外一声钟', '品德讲道德', '秋波我梦吟', '香飘十里风', '莺歌鹧鸪天']\n",
"czy poprawny wers jest pośród proponowanych wersów?: True\n",
"liczba proponowanych wersów: 125\n",
"\n",
"wynik przyjętej metryki: 0.008\n",
"\n",
"--------------------------------------------------\n",
"\n",
"wers pierwszy:\t\t 马蹄有韵分平仄\n",
"poprawny wers drugi:\t 公正不阿辩是非\n",
"\n",
"proponowane drugie wersy: ['德操应效柏和松', '举帜遵章共展才', '岩上青藤攀壁升', '毅力凝成跨海桥', '峰平径长难藏景', '后乐先忧范弟昆', '美德重红幸福门', '暮忆三秋雁字长', '烟柳风丝拂岸斜', '竹林满山景隽幽', '悦己悦人悦世间', '阔水滔滔有酒仙', '绕岭风清捧玉珠', '木讷的人难启迪', '野渡闲游一叶舟', '日月同辉光景嫣', '可恨蛮牛不识琴', '酒醉不如伴月眠', '烈日巡山果生香', '画就雾云笔墨香', '笔盖古今三千年', '江山犹得助诗豪', '人情世故亦须明', '克俭尚勤播誉名', '闹中取静看擂台', '几管笛箫奏响春', '起棒还将玉宇清', '规矩者应晓方圆', '猴腾广宇绽琼花', '言少言多尽美谈', '茫然回首奈何桥', '对仄对平对友情', '地阔难及贪欲长', '大漠孤烟古道长', '新颖文章秋水清', '璋玉无瑕耀祥光', '花好月圆夜长明', '国泰民安幸福多', '且求秋实果一园', '鹰翔蓝宇戏搏云', '浅草雷门愧下关', '篆铭山石荡浮华', '弄潮帆影港城新', '英年奋进惜时光', '电力惠民百业兴', '千树争高有健才', '梦里飞花静闻香', '梅香葱岭缀长虹', '国运弥盛史弥远', '时急方须济世才', '国展宏图烈士欣', '碧野连天满目春', '明月清风野菊香', '糊口养家望父滩', '五色龙溪抱江流', '笔点涟漪见水平', '动动脑筋动静无', '不叫俗尘污本真', '仰头天幕挂霓虹', '喜报频传战士家', '竹韵梅香总可人', '一指清凉尽染秋', '日移松影过禅床', '小小儿郎立路中', '听竹尤增几许清', '大丈夫能屈能伸', '武夷阳羡品俱馨', '拨弦弹水月偏题', '洞口经春长薜萝', '莫指云山认故乡', '粉黛淡施十五光', '象郡云烟锁桂梧', '雪融春到春融雪', '造客茅庐得孔明', '名享三奇显祖公', '明月来时渚落霜', '常将劲节负秋霜', '阳朔沿水显花荣', '好将长铗护黎民', '映月二泉人世情', '普荫全球亿万生', '枝上空吹故国风', '孔圣有才死后尊', '翠柳清风伴杏娇', '新庆交封暨缅封', '飞鸿远浦一时惊', '风雨人生鉴知音', '三令五申还有贪', '达业欣成万户楼', '重回津渡觅缠绵', '点水蜻蜓赏绿来', '道士身怀童子功', '步步登高上岳阳', '热血沸腾意若何', '对苑繁华万蕾新', '宝地佛临济世人', '得心应手手头宽', '时论同高尺五天', '一捧廉泉岛外春', '防不胜防贼近身', '知耻明荣胸臆宽', '每觉邻山云最多', '路远始于跬步间', '满腔忧愤铸诗魂', '何堪永夜漏更寒', '纵览清江高士怀', '春风惠我也惠人', '小可参禅入几分', '扁舟轻荡水云长', '常教翰墨作鼓吹', '空海星辰宇宙流', '兄弟同吟夜雨陪', '贞慧何辞驻翠颜', '竹韵真箫彻夜吹', '伟雄心志白鹤相知', '正在柳洲接柳风', '文庙弦音奏凯频', '马舞龙韵续华章', '诗心永驻圣洁泉', '山影盘龙月钓珠', '风吹杨柳翠还柔', '关外又开一朵奇葩', '幽梦一帘总是春', '崇廉尚德岛尚书', '大势所趋水如蓝', '何处箫声断客肠', '皓月两轮水面逢', '世事抛开谁为谁', '仙境田园隐棹声', '掷笔从戎壮士名', '夫再礼让妻再争', '秉公执法树廉风', '剌史同游忆月明', '尚德定可净人心', '凝才情血汗磨刀', '骨头坚硬好八连', '江郎梦里得犹神', '黄金灿灿冷如冰', '四海龙兴艺术潮', '腹有奸谋即兽心', '玉鼎沉香影寂寥', '月转疏枝过女墙', '他日凌云傲世间', '百年盟约好时光', '海上风云浪几何', '杜曲幸有桑麻田', '小鸟放歌岁月甜', '竹下新笋一色鲜', '俯首甘为孺子牛', '落蕊黯留一挽香', '踏雪归来鬓染香', '联内音声欠古风', '谁到篱前问姓名', '马上蓝天宇拓宽', '动地惊天事业昌', '山水相依诗易描', '美丽季节万里春', '流觞逸兴写兰亭', '慷慨悲歌魏晋风', '心情更比落花差', '五井丰碑今日游', '傲物诗文有劲风',
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
"liczba proponowanych wersów: 228\n",
"\n",
"wynik przyjętej metryki: 0.0\n",
"\n",
"--------------------------------------------------\n",
"\n",
"wers pierwszy:\t\t 山中习静观朝槿\n",
"poprawny wers drugi:\t 洞口经春长薜萝\n",
"\n",
"proponowane drugie wersy: ['人如无欲意何求', '举帜遵章共展才', '岩上青藤攀壁升', '问渠嘉树是谁栽', '济助家乡晃美名', '草内多藏五步蛇', '病入膏肓有治疗', '后乐先忧范弟昆', '好同蝉窟映三潭', '美德重红幸福门', '疑是瑶台月下逢', '锦绣春归百姓家', '鼓瑟还从曲里来', '雀跃鱼翔谐乐多', '莺燕对舞艳阳天', '雅文相识好文人', '映日桑榆重晚晴', '暮忆三秋雁字长', '烟柳风丝拂岸斜', '竹林满山景隽幽', '山雨不来仍有风', '悦己悦人悦世间', '阔水滔滔有酒仙', '燕子三双戏柳烟', '豪赌毁他上进心', '绕岭风清捧玉珠', '野渡闲游一叶舟', '含露芙蓉醉海棠', '日月同辉光景嫣', '可恨蛮牛不识琴', '酒醉不如伴月眠', '烈日巡山果生香', '画就雾云笔墨香', '悔被浮名牵累多', '坑我此生此袋烟', '笔盖古今三千年', '克俭尚勤播誉名', '闹中取静看擂台', '几管笛箫奏响春', '千树落花别样红', '扬州十里小红庐', '云本无心醉雨嫣', '起棒还将玉宇清', '言少言多尽美谈', '茫然回首奈何桥', '地阔难及贪欲长', '大漠孤烟古道长', '新颖文章秋水清', '璋玉无瑕耀祥光', '花好月圆夜长明', '国泰民安幸福多', '然乎者一字乾坤', '且求秋实果一园', '南燕离巢北国春', '我犹未脱长康痴', '浅草雷门愧下关', '融月新醅慢慢尝', '篆铭山石荡浮华', '弄潮帆影港城新', '翠竹山花恋我归', '千树争高有健才', '梦里飞花静闻香', '学问无穷博古今', '富裕安康福万家', '国运弥盛史弥远', '时急方须济世才', '祭酒干杯国子光', '国展宏图烈士欣', '卷帘烧烛看梅花', '碧野连天满目春', '明月清风野菊香', '糊口养家望父滩', '披甲拳飞对手逃', '五色龙溪抱江流', '淡淡菊香盈袖中', '笔点涟漪见水平', '动动脑筋动静无', '不叫俗尘污本真', '竹韵梅香总可人', '一指清凉尽染秋', '日移松影过禅床', '小小儿郎立路中', '意气风发马晓春', '涛落沙新畔易留', '德雨润开廉洁花', '胜算亦防失误时', '月老三分秋水寒', '听竹尤增几许清', '大丈夫能屈能伸', '武夷阳羡品俱馨', '弹毕雅曲听和声', '人爱人钦人喜欢', '洞口经春长薜萝', '一川杨柳笼和风', '莫指云山认故乡', '韧节有意杜虚名', '两个老头打秋风', '春梦几枝与醉痴', '粉黛淡施十五光', '草木蔫枯晒绿洲', '落日也将暮色描', '象郡云烟锁桂梧', '潋滟江波扑簌风', '人且清心同步行', '造客茅庐得孔明', '名享三奇显祖公', '明月来时渚落霜', '阳朔沿水显花荣', '好将长铗护黎民', '北海波清映日黄', '兰心未老梦如初', '映月二泉人世情', '世间最难得弟兄', '亭藏绿荫万般幽', '唯有读书声最佳', '枝上空吹故国风', '笑问书生君是谁', '孔圣有才死后尊', '翠柳清风伴杏娇', '新庆交封暨缅封', '飞鸿远浦一时惊', '往事依然笔底新', '湖月高低映绿杨', '风过泸州带酒香', '相思一点老了谁', '风雨人生鉴知音', '小觑浮名对酒歌', '浮舟水面尽飞花', '早出晚归皆自然', '翠袖拂空一抹烟', '却为心肝伤脑筋', '三令五申还有贪', '达业欣成万户楼', '点水蜻蜓赏绿来', '不言第一海胸襟', '重义轻财德道深', '道士身怀童子功', '步步登高上岳阳', '热血沸腾意若何', '对苑繁华万蕾新', '宝地佛临济世人', '时论同高尺五天', '知耻明荣胸臆宽', '每觉邻山云最多', '常想旁通不对头', '一意孤行不回头', '烛影摇红步步娇', '路远始于跬步间', '银烛金杯映翠眉', '江水源源发电来', '纵览清江高士怀', '关羽无停觅长兄', '落木落红落寂生', '小可参禅入几分', '九世同居号义门', '常教翰墨作鼓吹', '空海星辰宇宙流', '小桃几树鸟啼红', '皇城玉阙夕阳斜', '兄弟同吟夜雨陪', '思量枕席功夫', '贞慧何辞驻翠颜', '竹韵<E7ABB9>
"czy poprawny wers jest pośród proponowanych wersów?: True\n",
"liczba proponowanych wersów: 324\n",
"\n",
"wynik przyjętej metryki: 0.0030864197530864196\n",
"\n",
"--------------------------------------------------\n",
"\n",
"wers pierwszy:\t\t 园主意拳拳不惜重金寻国宝\n",
"poprawny wers drugi:\t 门生情切切敢捐大义铸心碑\n",
"\n",
"proponowane drugie wersy: ['慈心抒自在手慈眼慈甘露慈', '羊羔跪乳乌鸦哺母且思恩', '陈家颜割落耳朵颜面才是东家', '织天织地织出人间一个家', '一心一意书中可造万般星才', '瑶台丽日扬善弘仁一片天', '满腔诚信长赢福利四时春', '诗联并进渝水巴山起异军', '穷途哭恸阮籍猖狂独咏怀', '杏林栽福地仁心妙术起沉疴', '十年非忘本学子该当底气足', '英灵不昧览兹蹇蹇匪躬愚', '万锋笔健联台宿将舞龙文', '王粲传遣词备悉预须认定大纲', '八方铺锦绣紫燕娇啼羡物华', '一匕图始皇自有我易水悲歌', '待儿曹婚娶莼羹鲈脍慷慨知还', '琼花瑶叶雨浥芙蕖冉冉香', '皖吟风徽歌韵老村美景若诗', '中华娇子红塔山云烟贵烟', '字成一体大戟长枪跌宕书', '人言虽可信但防渭水混泾江', '年年七夕望月观星念恋人', '兴亡在抱百千年史鉴咏冰心', '网间谈情话外有音沁语非', '梅花千万点报得人间锦绣春', '鲲鹏翔瀚宇激越高昂自在身', '江边楼上商女欢讴玉树歌', '玉轮升碧海清辉广照出天然', '壮怀逸兴盛世鸿儒聚鹭园', '只身游燕赵淡泊无定水云舒', '啜甘须忆苦纵登高位犹纳清风', '赤隆扬赤帜九州共庆小康春', '登高极目从兹俗虑自消沉', '云天碧水横练陈江七彩颜', '心描山水情一枝一叶总关情', '心朝北斗祖国万岁路铺金', '纵横三界明察正果自如来', '平台屹屹出水蛟腾碧浪中', '风亦软云亦淡独怜一地月华', '为环球献瑞沧桑洗礼万年冰', '篇篇墨语字字无非寂寞吟', '指告后昆代代永铭国耻激扬', '更漏子蝶恋花千滴满见泪沙流', '剩有渊明趣随宜对秋色持醪', '帆樯蔽日风送筝声多在船', '满园春正好八面和风给力多', '春光照大地九州共绘小康图', '心游翰海叹这般风月似醉似痴', '张皇祖道哀丝豪竹别离间', '新枝染翠嫩柳初舒春色娇', '玉兰生妙境花开十里献诗情', '亭自皇朝建青松擎月可知情', '风和牵细浪托盘荷畔捧玉珠', '欢迎学者此道终须启后人', '城苑真娇育德千秋桃李馨', '对对总求工自对需如互对工', '蟾光初照银桨徐摇万点星', '太白泼墨天上月云石上诗', '东坡曾醉人间天上两婵娟', '山留菩境石鼓一悬梦万年', '归程渺渺涕泪常邀笑梦来', '梅影横窗瘦南枝微弄雪精神', '民生有幸嘣出实心得惠仁', '做戏人看戏人戏内戏人看人', '一琴兼一鹤仰承清献旧家风', '夜立桥上明月不流岁月流', '远镜微镜透镜反光镜常问伯奇', '胸怀税务戮力耕耘收税献丹心', '秉公办事牢记四知品自高', '锤镰记取红色党旗血染成', '秦有十八子笔墨抒意论春秋', '信众安详善念广播皆属真人', '德宏章贡修文悦礼敦古铄今', '蔺廉有隙终对刎颈死生交', '雪绽一树花漫园寒梅点点香', '垂名万古百战功随乃若何', '熄八年烽火侵华历史鉴千秋']\n",
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
"liczba proponowanych wersów: 78\n",
"\n",
"wynik przyjętej metryki: 0.0\n",
"\n",
"--------------------------------------------------\n",
"\n"
]
}
],
"source": [
"for indeks_wersu_pierwszego in wybrane_dane_testowe:\n",
" wers_pierwszy = testowe[\"in\"][indeks_wersu_pierwszego]\n",
" print(\"wers pierwszy:\\t\\t\", wers_pierwszy)\n",
" poprawny_wers_drugi = testowe[\"out\"][indeks_wersu_pierwszego]\n",
" print(\"poprawny wers drugi:\\t\", poprawny_wers_drugi)\n",
" print()\n",
"\n",
" reprezentacja_wersu_pierwszego = x_test[indeks_wersu_pierwszego]\n",
" mozliwe_indeksy_wersu_drugiego = []\n",
" for indeks_wersu_drugiego in range(len(y_test)):\n",
" reprezentacja_wersu_drugiego = y_test[indeks_wersu_drugiego]\n",
" wejscie_do_MLP = torch.cat((reprezentacja_wersu_pierwszego, reprezentacja_wersu_drugiego))\n",
" if klasyfikator.predict([wejscie_do_MLP])[0] == 1:\n",
" mozliwe_indeksy_wersu_drugiego.append(indeks_wersu_drugiego)\n",
"\n",
" proponowane_wersy = [testowe[\"out\"][i] for i in mozliwe_indeksy_wersu_drugiego]\n",
" print(\"proponowane drugie wersy:\", proponowane_wersy)\n",
" print(\"czy poprawny wers jest pośród proponowanych wersów?:\", poprawny_wers_drugi in proponowane_wersy)\n",
" print(\"liczba proponowanych wersów:\", len(proponowane_wersy))\n",
" print()\n",
"\n",
" print(\"wynik przyjętej metryki:\", jagosz_score(poprawny_wers_drugi, proponowane_wersy))\n",
" print()\n",
" print(\"-\"*50)\n",
" print()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "k4-MITYevK4C"
},
"source": [
"## MLPRegressor\n",
"### Proponuje wszystkie wersy, dla których ocena modelu jest większa niż 0,9."
]
},
{
"cell_type": "code",
"execution_count": 132,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "opt_lWIfvK4C",
"outputId": "ebacc9bf-8055-4ce2-a0e6-e659a42867f4"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"wers pierwszy:\t\t 仁义自修君子安我\n",
"poprawny wers drugi:\t 诗礼之教家人利贞\n",
"\n",
"proponowane drugie wersy: []\n",
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
"liczba proponowanych wersów: 0\n",
"\n",
"wynik przyjętej metryki: 0.0\n",
"wyjaśnienie - największe wartości przewidywań\n",
" indeks wartosc\n",
"275 275 0.698\n",
"\n",
"--------------------------------------------------\n",
"\n",
"wers pierwszy:\t\t 雪落千山静\n",
"poprawny wers drugi:\t 冰封万水寒\n",
"\n",
"proponowane drugie wersy: ['思量枕席功夫']\n",
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
"liczba proponowanych wersów: 1\n",
"\n",
"wynik przyjętej metryki: 0.0\n",
"\n",
"--------------------------------------------------\n",
"\n",
"wers pierwszy:\t\t 莫漫三槐羡王自\n",
"poprawny wers drugi:\t 须先百忍学张公\n",
"\n",
"proponowane drugie wersy: []\n",
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
"liczba proponowanych wersów: 0\n",
"\n",
"wynik przyjętej metryki: 0.0\n",
"wyjaśnienie - największe wartości przewidywań\n",
" indeks wartosc\n",
"275 275 0.662909\n",
"\n",
"--------------------------------------------------\n",
"\n",
"wers pierwszy:\t\t 到此酒垆应更好\n",
"poprawny wers drugi:\t 问渠嘉树是谁栽\n",
"\n",
"proponowane drugie wersy: []\n",
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
"liczba proponowanych wersów: 0\n",
"\n",
"wynik przyjętej metryki: 0.0\n",
"wyjaśnienie - największe wartości przewidywań\n",
" indeks wartosc\n",
"1317 1317 0.710679\n",
"\n",
"--------------------------------------------------\n",
"\n",
"wers pierwszy:\t\t 漫向楼台寻婉转\n",
"poprawny wers drugi:\t 重回津渡觅缠绵\n",
"\n",
"proponowane drugie wersy: []\n",
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
"liczba proponowanych wersów: 0\n",
"\n",
"wynik przyjętej metryki: 0.0\n",
"wyjaśnienie - największe wartości przewidywań\n",
" indeks wartosc\n",
"275 275 0.754231\n",
"\n",
"--------------------------------------------------\n",
"\n",
"wers pierwszy:\t\t 晋水育菩提叶叠千层呈瑞气\n",
"poprawny wers drugi:\t 玉兰生妙境花开十里献诗情\n",
"\n",
"proponowane drugie wersy: ['龙传人赞华夏名镇迎东风', '陈家颜割落耳朵颜面才是东家', '一心一意书中可造万般星才', '一僧击暮鼓南无长诵保平安', '挥洒一身才气令岁月流芳', '皖吟风徽歌韵老村美景若诗', '人言虽可信但防渭水混泾江', '兴亡在抱百千年史鉴咏冰心', '神州筑梦四方创业业峥嵘', '梅花千万点报得人间锦绣春', '广府古城百花芳草淹春秋', '终日惟杜门蔬食经卷绳床', '只身游燕赵淡泊无定水云舒', '地铁迎春西咸大道正龙吟', '千家纳福转型跨越晋城兴', '踏渭河潮宝鸡好梦咏春风', '嫩竹舒新绿倚遍春风翠袖寒', '心描山水情一枝一叶总关情', '玩木玩瓷玩玉玩核玩转岁月赏岁月数十年华笑世间不懂', '莺鹂鸣柳恰有南风雁早乘', '汇九霄圣脉犀江溢彩梦园芳', '俗子凡胎从来市井最人情', '做戏人看戏人戏内戏人看人', '远镜微镜透镜反光镜常问伯奇', '龙狮舞彩八方乐奏颂长春', '秦有十八子笔墨抒意论春秋', '月月风风叫你顿首献感情']\n",
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
"liczba proponowanych wersów: 27\n",
"\n",
"wynik przyjętej metryki: 0.0\n",
"wyjaśnienie - największe wartości przewidywań\n",
" indeks wartosc\n",
"369 369 1.050771\n",
"876 876 1.034025\n",
"754 754 1.031349\n",
"646 646 1.026354\n",
"1306 1306 1.025188\n",
"1130 1130 1.021680\n",
"1374 1374 1.018667\n",
"48 48 1.016918\n",
"460 460 1.009228\n",
"82 82 1.001561\n",
"744 744 0.984034\n",
"506 506 0.981010\n",
"1466 1466 0.972648\n",
"594 594 0.958953\n",
"16 16 0.957970\n",
"1136 1136 0.956761\n",
"1247 1247 0.955357\n",
"517 517 0.947808\n",
"195 195 0.932003\n",
"639 639 0.930620\n",
"530 530 0.929867\n",
"290 290 0.918470\n",
"687 687 0.908896\n",
"715 715 0.907384\n",
"1062 1062 0.907318\n",
"697 697 0.906565\n",
"1387 1387 0.903108\n",
"608 608 0.899049\n",
"\n",
"--------------------------------------------------\n",
"\n",
"wers pierwszy:\t\t 国运逢春好\n",
"poprawny wers drugi:\t 民心向党红\n",
"\n",
"proponowane drugie wersy: ['思量枕席功夫']\n",
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
"liczba proponowanych wersów: 1\n",
"\n",
"wynik przyjętej metryki: 0.0\n",
"\n",
"--------------------------------------------------\n",
"\n",
"wers pierwszy:\t\t 马蹄有韵分平仄\n",
"poprawny wers drugi:\t 公正不阿辩是非\n",
"\n",
"proponowane drugie wersy: []\n",
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
"liczba proponowanych wersów: 0\n",
"\n",
"wynik przyjętej metryki: 0.0\n",
"wyjaśnienie - największe wartości przewidywań\n",
" indeks wartosc\n",
"275 275 0.640859\n",
"\n",
"--------------------------------------------------\n",
"\n",
"wers pierwszy:\t\t 山中习静观朝槿\n",
"poprawny wers drugi:\t 洞口经春长薜萝\n",
"\n",
"proponowane drugie wersy: []\n",
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
"liczba proponowanych wersów: 0\n",
"\n",
"wynik przyjętej metryki: 0.0\n",
"wyjaśnienie - największe wartości przewidywań\n",
" indeks wartosc\n",
"275 275 0.760342\n",
"\n",
"--------------------------------------------------\n",
"\n",
"wers pierwszy:\t\t 园主意拳拳不惜重金寻国宝\n",
"poprawny wers drugi:\t 门生情切切敢捐大义铸心碑\n",
"\n",
"proponowane drugie wersy: ['龙传人赞华夏名镇迎东风', '一僧击暮鼓南无长诵保平安', '皖吟风徽歌韵老村美景若诗', '广府古城百花芳草淹春秋', '龙狮舞彩八方乐奏颂长春']\n",
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
"liczba proponowanych wersów: 5\n",
"\n",
"wynik przyjętej metryki: 0.0\n",
"\n",
"--------------------------------------------------\n",
"\n"
]
}
],
"source": [
"for indeks_wersu_pierwszego in wybrane_dane_testowe:\n",
" wers_pierwszy = testowe[\"in\"][indeks_wersu_pierwszego]\n",
" print(\"wers pierwszy:\\t\\t\", wers_pierwszy)\n",
" poprawny_wers_drugi = testowe[\"out\"][indeks_wersu_pierwszego]\n",
" print(\"poprawny wers drugi:\\t\", poprawny_wers_drugi)\n",
" print()\n",
"\n",
" reprezentacja_wersu_pierwszego = x_test[indeks_wersu_pierwszego]\n",
" mozliwe_indeksy_wersu_drugiego = []\n",
" wartosci_przewidywan_wersu_drugiego = []\n",
" for indeks_wersu_drugiego in range(len(y_test)):\n",
" reprezentacja_wersu_drugiego = y_test[indeks_wersu_drugiego]\n",
" wejscie_do_MLP = torch.cat((reprezentacja_wersu_pierwszego, reprezentacja_wersu_drugiego))\n",
" mozliwe_indeksy_wersu_drugiego.append(indeks_wersu_drugiego)\n",
" wartosci_przewidywan_wersu_drugiego.append(regresor.predict([wejscie_do_MLP])[0])\n",
"\n",
" pom_df = pandas.DataFrame({\"indeks\":mozliwe_indeksy_wersu_drugiego,\"wartosc\":wartosci_przewidywan_wersu_drugiego})\n",
" proponowane_wersy = [testowe[\"out\"][i] for i in pom_df[\"indeks\"] if pom_df[\"wartosc\"][i]>=0.9]\n",
"\n",
" print(\"proponowane drugie wersy:\", proponowane_wersy)\n",
" print(\"czy poprawny wers jest pośród proponowanych wersów?:\", poprawny_wers_drugi in proponowane_wersy)\n",
" print(\"liczba proponowanych wersów:\", len(proponowane_wersy))\n",
" print()\n",
"\n",
" print(\"wynik przyjętej metryki:\", jagosz_score(poprawny_wers_drugi, proponowane_wersy))\n",
" if (len(proponowane_wersy)<1 or len(proponowane_wersy)>5):\n",
" print(\"wyjaśnienie - największe wartości przewidywań\")\n",
" print(pom_df.nlargest(len(proponowane_wersy)+1, \"wartosc\"))\n",
" print()\n",
" print(\"-\"*50)\n",
" print()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "SK0POtR2vK4D"
},
"source": [
"### Przyjęta metryka dla 1/100 zbioru testowego."
]
},
{
"cell_type": "code",
"execution_count": 133,
"metadata": {
"id": "3s4TfbKsvK4D",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "7176ce6c-cb2d-4dae-870f-79b7fa30ed4d"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"0.008174386920980926\n"
]
}
],
"source": [
"krotki = []\n",
"czesc_zbioru_testowego, _ = train_test_split(x_test,test_size=0.95,random_state=42)\n",
"\n",
"for indeks_wersu_pierwszego in range(len(czesc_zbioru_testowego)):\n",
" wers_pierwszy = testowe[\"in\"][indeks_wersu_pierwszego]\n",
" poprawny_wers_drugi = testowe[\"out\"][indeks_wersu_pierwszego]\n",
"\n",
" reprezentacja_wersu_pierwszego = x_test[indeks_wersu_pierwszego]\n",
" mozliwe_indeksy_wersu_drugiego = []\n",
" wartosci_przewidywan_wersu_drugiego = []\n",
" for indeks_wersu_drugiego in range(len(y_test)):\n",
" reprezentacja_wersu_drugiego = y_test[indeks_wersu_drugiego]\n",
" wejscie_do_MLP = torch.cat((reprezentacja_wersu_pierwszego, reprezentacja_wersu_drugiego))\n",
" mozliwe_indeksy_wersu_drugiego.append(indeks_wersu_drugiego)\n",
" wartosci_przewidywan_wersu_drugiego.append(regresor.predict([wejscie_do_MLP])[0])\n",
"\n",
" pom_df = pandas.DataFrame({\"indeks\":mozliwe_indeksy_wersu_drugiego,\"wartosc\":wartosci_przewidywan_wersu_drugiego})\n",
" proponowane_wersy = [testowe[\"out\"][i] for i in pom_df[\"indeks\"] if pom_df[\"wartosc\"][i]>=0.9]\n",
"\n",
" krotki.append((poprawny_wers_drugi,proponowane_wersy))\n",
"\n",
"print(jagosz_score_dla_zbioru(krotki))"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "wRdNz_BFvK4D"
},
"source": [
"### Średnia metryk dla 1/100 zbioru testowego."
]
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {
"id": "xVbgLOvUvK4E",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "71a3adac-7143-44e1-f363-99719e8a4c8a"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"0.004512548262548263\n"
]
}
],
"source": [
"jagosz_scores=[]\n",
"for indeks_wersu_pierwszego in range(len(czesc_zbioru_testowego)):\n",
" wers_pierwszy = testowe[\"in\"][indeks_wersu_pierwszego]\n",
" poprawny_wers_drugi = testowe[\"out\"][indeks_wersu_pierwszego]\n",
"\n",
" reprezentacja_wersu_pierwszego = x_test[indeks_wersu_pierwszego]\n",
" mozliwe_indeksy_wersu_drugiego = []\n",
" wartosci_przewidywan_wersu_drugiego = []\n",
" for indeks_wersu_drugiego in range(len(y_test)):\n",
" reprezentacja_wersu_drugiego = y_test[indeks_wersu_drugiego]\n",
" wejscie_do_MLP = torch.cat((reprezentacja_wersu_pierwszego, reprezentacja_wersu_drugiego))\n",
" mozliwe_indeksy_wersu_drugiego.append(indeks_wersu_drugiego)\n",
" wartosci_przewidywan_wersu_drugiego.append(regresor.predict([wejscie_do_MLP])[0])\n",
"\n",
" pom_df = pandas.DataFrame({\"indeks\":mozliwe_indeksy_wersu_drugiego,\"wartosc\":wartosci_przewidywan_wersu_drugiego})\n",
" proponowane_wersy = [testowe[\"out\"][i] for i in pom_df[\"indeks\"] if pom_df[\"wartosc\"][i]>=0.9]\n",
"\n",
" jagosz_scores.append(jagosz_score(poprawny_wers_drugi,proponowane_wersy))\n",
"\n",
"print(numpy.mean(jagosz_scores))"
]
},
{
"cell_type": "code",
"source": [
"print(len(czesc_zbioru_testowego))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "MynyTHiUFPEU",
"outputId": "d3fd2288-464f-4ad7-d5f3-baac564cdbda"
},
"execution_count": 135,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"74\n"
]
}
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
},
"colab": {
"provenance": [],
"gpuType": "T4"
},
"accelerator": "GPU"
},
"nbformat": 4,
"nbformat_minor": 0
}