2135 lines
156 KiB
Plaintext
2135 lines
156 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 99,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "z8cJbMghvK3k",
|
|
"outputId": "09520694-de64-4046-c2a6-639031aa1a10"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (2.32.3)\n",
|
|
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests) (3.3.2)\n",
|
|
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests) (3.7)\n",
|
|
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests) (2.0.7)\n",
|
|
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests) (2024.6.2)\n",
|
|
"Requirement already satisfied: jieba in /usr/local/lib/python3.10/dist-packages (0.42.1)\n",
|
|
"Requirement already satisfied: pypinyin in /usr/local/lib/python3.10/dist-packages (0.51.0)\n",
|
|
"Using pip 23.1.2 from /usr/local/lib/python3.10/dist-packages/pip (python 3.10)\n",
|
|
"Looking in indexes: https://download.pytorch.org/whl/cu118\n",
|
|
"Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (2.3.1+cu118)\n",
|
|
"Requirement already satisfied: torchtext in /usr/local/lib/python3.10/dist-packages (0.18.0)\n",
|
|
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch) (3.14.0)\n",
|
|
"Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch) (4.12.1)\n",
|
|
"Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch) (1.12.1)\n",
|
|
"Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch) (3.3)\n",
|
|
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch) (3.1.4)\n",
|
|
"Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch) (2023.6.0)\n",
|
|
"Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.8.89 in /usr/local/lib/python3.10/dist-packages (from torch) (11.8.89)\n",
|
|
"Requirement already satisfied: nvidia-cuda-runtime-cu11==11.8.89 in /usr/local/lib/python3.10/dist-packages (from torch) (11.8.89)\n",
|
|
"Requirement already satisfied: nvidia-cuda-cupti-cu11==11.8.87 in /usr/local/lib/python3.10/dist-packages (from torch) (11.8.87)\n",
|
|
"Requirement already satisfied: nvidia-cudnn-cu11==8.7.0.84 in /usr/local/lib/python3.10/dist-packages (from torch) (8.7.0.84)\n",
|
|
"Requirement already satisfied: nvidia-cublas-cu11==11.11.3.6 in /usr/local/lib/python3.10/dist-packages (from torch) (11.11.3.6)\n",
|
|
"Requirement already satisfied: nvidia-cufft-cu11==10.9.0.58 in /usr/local/lib/python3.10/dist-packages (from torch) (10.9.0.58)\n",
|
|
"Requirement already satisfied: nvidia-curand-cu11==10.3.0.86 in /usr/local/lib/python3.10/dist-packages (from torch) (10.3.0.86)\n",
|
|
"Requirement already satisfied: nvidia-cusolver-cu11==11.4.1.48 in /usr/local/lib/python3.10/dist-packages (from torch) (11.4.1.48)\n",
|
|
"Requirement already satisfied: nvidia-cusparse-cu11==11.7.5.86 in /usr/local/lib/python3.10/dist-packages (from torch) (11.7.5.86)\n",
|
|
"Requirement already satisfied: nvidia-nccl-cu11==2.20.5 in /usr/local/lib/python3.10/dist-packages (from torch) (2.20.5)\n",
|
|
"Requirement already satisfied: nvidia-nvtx-cu11==11.8.86 in /usr/local/lib/python3.10/dist-packages (from torch) (11.8.86)\n",
|
|
"Requirement already satisfied: triton==2.3.1 in /usr/local/lib/python3.10/dist-packages (from torch) (2.3.1)\n",
|
|
"Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from torchtext) (4.66.4)\n",
|
|
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchtext) (2.32.3)\n",
|
|
"Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchtext) (1.25.2)\n",
|
|
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch) (2.1.5)\n",
|
|
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->torchtext) (3.3.2)\n",
|
|
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchtext) (3.7)\n",
|
|
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchtext) (2.0.7)\n",
|
|
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchtext) (2024.6.2)\n",
|
|
"Requirement already satisfied: mpmath<1.4.0,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch) (1.3.0)\n",
|
|
"Requirement already satisfied: chardet in /usr/local/lib/python3.10/dist-packages (5.2.0)\n",
|
|
"Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.41.2)\n",
|
|
"Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.14.0)\n",
|
|
"Requirement already satisfied: huggingface-hub<1.0,>=0.23.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.23.2)\n",
|
|
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.25.2)\n",
|
|
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.0)\n",
|
|
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n",
|
|
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2024.5.15)\n",
|
|
"Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.32.3)\n",
|
|
"Requirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.19.1)\n",
|
|
"Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.3)\n",
|
|
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.4)\n",
|
|
"Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.0->transformers) (2023.6.0)\n",
|
|
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.23.0->transformers) (4.12.1)\n",
|
|
"Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n",
|
|
"Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.7)\n",
|
|
"Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n",
|
|
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.6.2)\n",
|
|
"Requirement already satisfied: ipywidgets in /usr/local/lib/python3.10/dist-packages (7.7.1)\n",
|
|
"Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets) (5.5.6)\n",
|
|
"Requirement already satisfied: ipython-genutils~=0.2.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets) (0.2.0)\n",
|
|
"Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.10/dist-packages (from ipywidgets) (5.7.1)\n",
|
|
"Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets) (3.6.6)\n",
|
|
"Requirement already satisfied: ipython>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets) (7.34.0)\n",
|
|
"Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets) (3.0.11)\n",
|
|
"Requirement already satisfied: jupyter-client in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets) (8.6.2)\n",
|
|
"Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipykernel>=4.5.1->ipywidgets) (6.3.3)\n",
|
|
"Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets) (67.7.2)\n",
|
|
"Requirement already satisfied: jedi>=0.16 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets) (0.19.1)\n",
|
|
"Requirement already satisfied: decorator in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets) (4.4.2)\n",
|
|
"Requirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets) (0.7.5)\n",
|
|
"Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets) (3.0.45)\n",
|
|
"Requirement already satisfied: pygments in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets) (2.16.1)\n",
|
|
"Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets) (0.2.0)\n",
|
|
"Requirement already satisfied: matplotlib-inline in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets) (0.1.7)\n",
|
|
"Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython>=4.0.0->ipywidgets) (4.9.0)\n",
|
|
"Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.10/dist-packages (from widgetsnbextension~=3.6.0->ipywidgets) (6.5.5)\n",
|
|
"Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets) (0.8.4)\n",
|
|
"Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (3.1.4)\n",
|
|
"Requirement already satisfied: pyzmq<25,>=17 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (24.0.1)\n",
|
|
"Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (23.1.0)\n",
|
|
"Requirement already satisfied: jupyter-core>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (5.7.2)\n",
|
|
"Collecting jupyter-client (from ipykernel>=4.5.1->ipywidgets)\n",
|
|
" Using cached jupyter_client-7.4.9-py3-none-any.whl (133 kB)\n",
|
|
"Requirement already satisfied: nbformat in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (5.10.4)\n",
|
|
"Requirement already satisfied: nbconvert>=5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (6.5.4)\n",
|
|
"Requirement already satisfied: nest-asyncio>=1.5 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.6.0)\n",
|
|
"Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.8.3)\n",
|
|
"Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.18.1)\n",
|
|
"Requirement already satisfied: prometheus-client in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.20.0)\n",
|
|
"Requirement already satisfied: nbclassic>=0.4.7 in /usr/local/lib/python3.10/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.1.0)\n",
|
|
"Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets) (0.4)\n",
|
|
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets) (2.8.2)\n",
|
|
"Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.10/dist-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets) (0.7.0)\n",
|
|
"Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=4.0.0->ipywidgets) (0.2.13)\n",
|
|
"Requirement already satisfied: platformdirs>=2.5 in /usr/local/lib/python3.10/dist-packages (from jupyter-core>=4.6.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (4.2.2)\n",
|
|
"Requirement already satisfied: notebook-shim>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.2.4)\n",
|
|
"Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (4.9.4)\n",
|
|
"Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (4.12.3)\n",
|
|
"Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (6.1.0)\n",
|
|
"Requirement already satisfied: defusedxml in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.7.1)\n",
|
|
"Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.3.0)\n",
|
|
"Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (2.1.5)\n",
|
|
"Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.8.4)\n",
|
|
"Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.10.0)\n",
|
|
"Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (24.0)\n",
|
|
"Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.5.1)\n",
|
|
"Requirement already satisfied: tinycss2 in /usr/local/lib/python3.10/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.3.0)\n",
|
|
"Requirement already satisfied: fastjsonschema>=2.15 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (2.19.1)\n",
|
|
"Requirement already satisfied: jsonschema>=2.6 in /usr/local/lib/python3.10/dist-packages (from nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (4.19.2)\n",
|
|
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->jupyter-client->ipykernel>=4.5.1->ipywidgets) (1.16.0)\n",
|
|
"Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.10/dist-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (21.2.0)\n",
|
|
"Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (23.2.0)\n",
|
|
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (2023.12.1)\n",
|
|
"Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.35.1)\n",
|
|
"Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.18.1)\n",
|
|
"Requirement already satisfied: jupyter-server<3,>=1.8 in /usr/local/lib/python3.10/dist-packages (from notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.24.0)\n",
|
|
"Requirement already satisfied: cffi>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.16.0)\n",
|
|
"Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (2.5)\n",
|
|
"Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (0.5.1)\n",
|
|
"Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (2.22)\n",
|
|
"Requirement already satisfied: anyio<4,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (3.7.1)\n",
|
|
"Requirement already satisfied: websocket-client in /usr/local/lib/python3.10/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.8.0)\n",
|
|
"Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (3.7)\n",
|
|
"Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.3.1)\n",
|
|
"Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets) (1.2.1)\n",
|
|
"Installing collected packages: jupyter-client\n",
|
|
" Attempting uninstall: jupyter-client\n",
|
|
" Found existing installation: jupyter_client 8.6.2\n",
|
|
" Uninstalling jupyter_client-8.6.2:\n",
|
|
" Successfully uninstalled jupyter_client-8.6.2\n",
|
|
"Successfully installed jupyter-client-7.4.9\n",
|
|
"Requirement already satisfied: jupyter_core in /usr/local/lib/python3.10/dist-packages (5.7.2)\n",
|
|
"Requirement already satisfied: jupyter_client in /usr/local/lib/python3.10/dist-packages (7.4.9)\n",
|
|
"Collecting jupyter_client\n",
|
|
" Using cached jupyter_client-8.6.2-py3-none-any.whl (105 kB)\n",
|
|
"Requirement already satisfied: platformdirs>=2.5 in /usr/local/lib/python3.10/dist-packages (from jupyter_core) (4.2.2)\n",
|
|
"Requirement already satisfied: traitlets>=5.3 in /usr/local/lib/python3.10/dist-packages (from jupyter_core) (5.7.1)\n",
|
|
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from jupyter_client) (2.8.2)\n",
|
|
"Requirement already satisfied: pyzmq>=23.0 in /usr/local/lib/python3.10/dist-packages (from jupyter_client) (24.0.1)\n",
|
|
"Requirement already satisfied: tornado>=6.2 in /usr/local/lib/python3.10/dist-packages (from jupyter_client) (6.3.3)\n",
|
|
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->jupyter_client) (1.16.0)\n",
|
|
"Installing collected packages: jupyter_client\n",
|
|
" Attempting uninstall: jupyter_client\n",
|
|
" Found existing installation: jupyter_client 7.4.9\n",
|
|
" Uninstalling jupyter_client-7.4.9:\n",
|
|
" Successfully uninstalled jupyter_client-7.4.9\n",
|
|
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
|
|
"notebook 6.5.5 requires jupyter-client<8,>=5.3.4, but you have jupyter-client 8.6.2 which is incompatible.\u001b[0m\u001b[31m\n",
|
|
"\u001b[0mSuccessfully installed jupyter_client-8.6.2\n",
|
|
"Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (2.0.3)\n",
|
|
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n",
|
|
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2023.4)\n",
|
|
"Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.1)\n",
|
|
"Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas) (1.25.2)\n",
|
|
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
|
|
"\u001b[31mERROR: Could not find a version that satisfies the requirement re (from versions: none)\u001b[0m\u001b[31m\n",
|
|
"\u001b[0m\u001b[31mERROR: No matching distribution found for re\u001b[0m\u001b[31m\n",
|
|
"\u001b[0mRequirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (1.2.2)\n",
|
|
"Requirement already satisfied: numpy>=1.17.3 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.25.2)\n",
|
|
"Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.10.1)\n",
|
|
"Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (1.4.2)\n",
|
|
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn) (3.5.0)\n",
|
|
"Requirement already satisfied: scipy==1.10.1 in /usr/local/lib/python3.10/dist-packages (1.10.1)\n",
|
|
"Requirement already satisfied: numpy<1.27.0,>=1.19.5 in /usr/local/lib/python3.10/dist-packages (from scipy==1.10.1) (1.25.2)\n",
|
|
"Requirement already satisfied: gensim in /usr/local/lib/python3.10/dist-packages (4.3.2)\n",
|
|
"Requirement already satisfied: numpy>=1.18.5 in /usr/local/lib/python3.10/dist-packages (from gensim) (1.25.2)\n",
|
|
"Requirement already satisfied: scipy>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from gensim) (1.10.1)\n",
|
|
"Requirement already satisfied: smart-open>=1.8.1 in /usr/local/lib/python3.10/dist-packages (from gensim) (6.4.0)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"%pip install --upgrade requests\n",
|
|
"%pip install jieba\n",
|
|
"%pip install pypinyin\n",
|
|
"%pip install -v torch torchtext --index-url https://download.pytorch.org/whl/cu118\n",
|
|
"%pip install chardet\n",
|
|
"%pip install transformers\n",
|
|
"%pip install ipywidgets\n",
|
|
"%pip install --upgrade jupyter_core jupyter_client\n",
|
|
"%pip install pandas\n",
|
|
"%pip install re\n",
|
|
"%pip install scikit-learn\n",
|
|
"%pip install scipy==1.10.1\n",
|
|
"%pip install gensim"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 100,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "-fw8K0r8vK3m",
|
|
"outputId": "7c973fa0-05f5-42c6-cb7e-66b8d0500c6b"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"Enabling notebook extension jupyter-js-widgets/extension...\n",
|
|
"Paths used for configuration of notebook: \n",
|
|
" \t/root/.jupyter/nbconfig/notebook.json\n",
|
|
"Paths used for configuration of notebook: \n",
|
|
" \t\n",
|
|
" - Validating: \u001b[32mOK\u001b[0m\n",
|
|
"Paths used for configuration of notebook: \n",
|
|
" \t/root/.jupyter/nbconfig/notebook.json\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"!jupyter nbextension enable --py widgetsnbextension\n",
|
|
"import jieba\n",
|
|
"import pypinyin\n",
|
|
"import torch\n",
|
|
"from transformers import AutoTokenizer, AutoModel\n",
|
|
"import pandas\n",
|
|
"import re\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from sklearn.datasets import load_iris\n",
|
|
"import numpy"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 101,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "5EWDs2qIvK3n",
|
|
"outputId": "a0d96d62-9121-43de-9924-5fa9c67c0a6f"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"True\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(torch.cuda.is_available())\n",
|
|
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"from google.colab import drive\n",
|
|
"drive.mount('/content/drive')"
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "V7yH6_p0xVM4",
|
|
"outputId": "89c42e07-65d0-4e8b-ecfa-8d0c0c2fa7cf"
|
|
},
|
|
"execution_count": 102,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
|
|
]
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "hAfKmY-8vK3o"
|
|
},
|
|
"source": [
|
|
"## Normalizacja wejścia - pozbycie się spacji i znaków innych niż chińskie (interpunkcyjnych).\n",
|
|
"### TODO - przepisać używając słownika znaków chińskich?"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 103,
|
|
"metadata": {
|
|
"id": "70WYCTvLvK3p"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# lower() - male litery\n",
|
|
"# strip() - bez krancowych znakow niedrukowalnych\n",
|
|
"# bez znakow interpunkcyjnych\n",
|
|
"def normalizeString(s):\n",
|
|
" s = s.lower().strip()\n",
|
|
" s = re.sub(r\"([.!?])\", r\"\", s)\n",
|
|
" s = re.sub(r\"([,;:-])\", r\"\", s)\n",
|
|
" s = re.sub(r\"([。,?”“《》·、!:;π…ㄚ])\", r\"\", s)\n",
|
|
" s = re.sub(r\"([/])\", r\"\", s)\n",
|
|
" s = re.sub(r\"(['\\\"])\", r\" \", s)\n",
|
|
" return s.strip()\n",
|
|
"\n",
|
|
"def normalizeChinese(s):\n",
|
|
" s = normalizeString(s)\n",
|
|
" pom = \"\"\n",
|
|
" for c in s:\n",
|
|
" if c != \" \":\n",
|
|
" pom+=c\n",
|
|
" #pom+=\" \"\n",
|
|
" return pom.strip()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "AzN_U3XuvK3p"
|
|
},
|
|
"source": [
|
|
"## Wczytanie zbioru danych. https://www.kaggle.com/datasets/marquis03/chinese-couplets-dataset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 104,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "YMFveDsRvK3q",
|
|
"outputId": "0f9142d3-94bb-4bd1-d5aa-ad6c0d506a85"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"腾飞上铁锐意改革谋发展勇当千里马\n",
|
|
"和谐南供安全送电保畅通争做领头羊\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"fixed_couplets_in = pandas.read_csv(\"fixed_couplets_in.txt\", sep=\"\\t\", names=[\"in\"], header=None)\n",
|
|
"fixed_couplets_out = pandas.read_csv(\"fixed_couplets_out.txt\", sep=\"\\t\", names=[\"out\"], header=None)\n",
|
|
"\n",
|
|
"normalized_fixed_couplets_in=[]\n",
|
|
"for _ in fixed_couplets_in[\"in\"]:\n",
|
|
" normalized_fixed_couplets_in.append(normalizeChinese(_))\n",
|
|
"normalized_fixed_couplets_out=[]\n",
|
|
"for _ in fixed_couplets_out[\"out\"]:\n",
|
|
" normalized_fixed_couplets_out.append(normalizeChinese(_))\n",
|
|
"\n",
|
|
"print(normalized_fixed_couplets_in[0])\n",
|
|
"print(normalized_fixed_couplets_out[0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 105,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "lRWgbshhvK3q",
|
|
"outputId": "633b6e24-3892-48e9-fde1-4e82b4d32e57"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
" in out\n",
|
|
"0 腾飞上铁锐意改革谋发展勇当千里马 和谐南供安全送电保畅通争做领头羊\n",
|
|
"1 风弦未拨心先乱 夜幕已沉梦更闲\n",
|
|
"2 花梦粘于春袖口 莺声溅落柳枝头\n",
|
|
"3 晋世文章昌二陆 魏家词赋重三曹\n",
|
|
"4 一句相思吟岁月 千杯美酒醉风情\n",
|
|
"... ... ...\n",
|
|
"744910 半榻诗书盈陋室 一墙字画靓寒庐\n",
|
|
"744911 借角青山埋姓字 掬壶明月洗尘心\n",
|
|
"744912 苑内尽天姿锦窠仙髻无双艳 亭前多国色金粉紫檀第一香\n",
|
|
"744913 浩淼洞庭极目天为界 安闲钓叟静心孰羡鱼\n",
|
|
"744914 志踏云梯能揽月 坚磨铁棒可成针\n",
|
|
"\n",
|
|
"[744915 rows x 2 columns]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"fixed_couplets = pandas.DataFrame(\n",
|
|
" {\"in\": normalized_fixed_couplets_in,\n",
|
|
" \"out\": normalized_fixed_couplets_out\n",
|
|
" }\n",
|
|
" )\n",
|
|
"print(fixed_couplets)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "BaOkCk66vK3r"
|
|
},
|
|
"source": [
|
|
"### Odrzucenie 95% danych - więcej niż 5% zajmuje całą pamięć i wywala program.\n",
|
|
"### Podział danych na zbiór treningowy i testowy."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 106,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "gZhvQ-9SvK3r",
|
|
"outputId": "516353b4-84df-4b7c-fcd5-4ff7642cf46e"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
" in out\n",
|
|
"567354 宇高炎暑净 秋爽飒风来\n",
|
|
"118920 忧乐关天下 安危系一身\n",
|
|
"738591 一盏相思量寂寞 三分惆怅兑凄凉\n",
|
|
"509346 孝驻锦绣城喜吕梁歌飞春融三晋千秋画 义圆和谐梦看汾河景瑞水起九州万卷诗\n",
|
|
"75388 春临八桂海豚舞 福满九州彩凤飞\n",
|
|
"... ... ...\n",
|
|
"116492 创中华古老文明当同日月齐辉功垂万代 启黎庶鸿蒙草昧是与山河并寿德颂千秋\n",
|
|
"91658 纠缠海角指相思何时作罢 浪迹天涯心倦怠哪处归依\n",
|
|
"101376 特地显英灵化被逢人歌泽渥 配天昭厚德恩深无处不波恬\n",
|
|
"262048 温暖鹅城展翅奋飞中国梦 祥和蛇岁铺春欢庆小康年\n",
|
|
"415192 百业一支歌歌伴和风谐雨唱 九江千古梦梦同朗月艳阳圆\n",
|
|
"\n",
|
|
"[5959 rows x 2 columns]\n",
|
|
" in out\n",
|
|
"274864 林霭渐浓迷古寺 尘烟已远隐青山\n",
|
|
"222320 自古青天匡正义 而今华夏振雄风\n",
|
|
"100260 真心请客就该一五一五 假意为情何必我开我开\n",
|
|
"435928 爱本有心今不见 人如无欲意何求\n",
|
|
"446991 欲抹闲愁实不易 谁将片语问何求\n",
|
|
"... ... ...\n",
|
|
"213030 万象随缘观自在 一心发愿待君归\n",
|
|
"299155 春联妙句动心魄 小院雅风入彩光\n",
|
|
"643294 梅亭吹雪横霜笛 松麓邀云放月筝\n",
|
|
"628861 红似桃花白似雪 绿如李叶亮如霜\n",
|
|
"566605 数字双音分两用 联文对句限孤平\n",
|
|
"\n",
|
|
"[1490 rows x 2 columns]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"male, duze = train_test_split(fixed_couplets,test_size=0.99,random_state=42)\n",
|
|
"treningowe, testowe = train_test_split(male,test_size=0.2,random_state=42)\n",
|
|
"print(treningowe)\n",
|
|
"print(testowe)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "2Ek1LXPjvK3r"
|
|
},
|
|
"source": [
|
|
"### Przywrócenie numeracji od 0."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 107,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "Pe3iTklRvK3r",
|
|
"outputId": "d8378e46-4634-49f0-f6c7-cc415cb2a032"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
" in out\n",
|
|
"0 宇高炎暑净 秋爽飒风来\n",
|
|
"1 忧乐关天下 安危系一身\n",
|
|
"2 一盏相思量寂寞 三分惆怅兑凄凉\n",
|
|
"3 孝驻锦绣城喜吕梁歌飞春融三晋千秋画 义圆和谐梦看汾河景瑞水起九州万卷诗\n",
|
|
"4 春临八桂海豚舞 福满九州彩凤飞\n",
|
|
"... ... ...\n",
|
|
"5954 创中华古老文明当同日月齐辉功垂万代 启黎庶鸿蒙草昧是与山河并寿德颂千秋\n",
|
|
"5955 纠缠海角指相思何时作罢 浪迹天涯心倦怠哪处归依\n",
|
|
"5956 特地显英灵化被逢人歌泽渥 配天昭厚德恩深无处不波恬\n",
|
|
"5957 温暖鹅城展翅奋飞中国梦 祥和蛇岁铺春欢庆小康年\n",
|
|
"5958 百业一支歌歌伴和风谐雨唱 九江千古梦梦同朗月艳阳圆\n",
|
|
"\n",
|
|
"[5959 rows x 2 columns]\n",
|
|
" in out\n",
|
|
"0 林霭渐浓迷古寺 尘烟已远隐青山\n",
|
|
"1 自古青天匡正义 而今华夏振雄风\n",
|
|
"2 真心请客就该一五一五 假意为情何必我开我开\n",
|
|
"3 爱本有心今不见 人如无欲意何求\n",
|
|
"4 欲抹闲愁实不易 谁将片语问何求\n",
|
|
"... ... ...\n",
|
|
"1485 万象随缘观自在 一心发愿待君归\n",
|
|
"1486 春联妙句动心魄 小院雅风入彩光\n",
|
|
"1487 梅亭吹雪横霜笛 松麓邀云放月筝\n",
|
|
"1488 红似桃花白似雪 绿如李叶亮如霜\n",
|
|
"1489 数字双音分两用 联文对句限孤平\n",
|
|
"\n",
|
|
"[1490 rows x 2 columns]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"treningowe = treningowe.reset_index(drop=True)\n",
|
|
"testowe = testowe.reset_index(drop=True)\n",
|
|
"print(treningowe)\n",
|
|
"print(testowe)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "1_w2PXrGvK3s"
|
|
},
|
|
"source": [
|
|
"### Pakiet *pypinyin* przewiduje wymowę pinyin dobrze bez potrzeby używania pakietu *jieba*."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 108,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "5dnmD3wevK3s",
|
|
"outputId": "7b3a7727-f0f5-4657-e3b1-e4760c81144d"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"春临八桂海豚舞\n",
|
|
"[['chun1'], ['lin2'], ['ba1'], ['gui4'], ['hai3'], ['tun2'], ['wu3']]\n",
|
|
"['chun1', 'lin2', 'ba1', 'gui4', 'hai3', 'tun2', 'wu3']\n",
|
|
"['春临', '八桂', '海豚', '舞']\n",
|
|
"[['chun1'], ['lin2'], ['ba1'], ['gui4'], ['hai3'], ['tun2'], ['wu3']]\n",
|
|
"['chun1', 'lin2', 'ba1', 'gui4', 'hai3', 'tun2', 'wu3']\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from pypinyin import pinyin, lazy_pinyin, Style\n",
|
|
"\n",
|
|
"zdanie = treningowe[\"in\"][4]\n",
|
|
"print(zdanie)\n",
|
|
"print(pinyin(zdanie, style=Style.TONE3, neutral_tone_with_five=True))\n",
|
|
"print(lazy_pinyin(zdanie, style=Style.TONE3, neutral_tone_with_five=True))\n",
|
|
"\n",
|
|
"slowa = list(jieba.cut(zdanie))\n",
|
|
"print(slowa)\n",
|
|
"print(pinyin(slowa, style=Style.TONE3, neutral_tone_with_five=True))\n",
|
|
"print(lazy_pinyin(slowa, style=Style.TONE3, neutral_tone_with_five=True))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "LghjKuLGvK3s"
|
|
},
|
|
"source": [
|
|
"## Podział wymowy pinyin na początki (initials), końcówki (finals) i tony.\n",
|
|
"### Zamina w liczby przy pomocy słownika."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 109,
|
|
"metadata": {
|
|
"id": "xj0SAPGsvK3s"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"from pypinyin.contrib.tone_convert import to_finals, to_initials\n",
|
|
"# 声母表\n",
|
|
"_INITIALS=['b','p','m','f','d','t','n','l','g','k','h','j','q','x','zh','ch','sh','r','z','c','s',]\n",
|
|
"# 声母表,把 y,w 也当作声母\n",
|
|
"_INITIALS_NOT_STRICT=_INITIALS+['y','w']\n",
|
|
"# 韵母表\n",
|
|
"_FINALS=['i','u','ü','a','ia','ua','o','uo','e','ie','üe','ai','uai','ei','uei','ao','iao','ou','iou','an','ian','uan','üan','en','in','uen','ün','ang','iang','uang','eng','ing','ueng','ong','iong','er','ê',]\n",
|
|
"\n",
|
|
"slownik_initials = {}\n",
|
|
"licznik = 1\n",
|
|
"for indeks_wersu_pierwszego in _INITIALS+[\"\"]:\n",
|
|
" slownik_initials[indeks_wersu_pierwszego] = licznik\n",
|
|
" licznik+=1\n",
|
|
"\n",
|
|
"slownik_finals = {}\n",
|
|
"licznik = 1\n",
|
|
"for indeks_wersu_pierwszego in _FINALS+[\"\"]:\n",
|
|
" slownik_finals[indeks_wersu_pierwszego] = licznik\n",
|
|
" licznik+=1\n",
|
|
"\n",
|
|
"def poczatek_koniec_ton(zapis_pinyin_3):\n",
|
|
" poczatek = slownik_initials[to_initials(zapis_pinyin_3)]\n",
|
|
" koniec = slownik_finals[to_finals(zapis_pinyin_3).replace('v', 'ü')]\n",
|
|
" ton = int(zapis_pinyin_3[-1])\n",
|
|
" return poczatek, koniec, ton\n",
|
|
"\n",
|
|
"def wymowy_i_tony_zdania(zdanie):\n",
|
|
" zapis_pinyin_3_zdania = lazy_pinyin(zdanie, style=Style.TONE3, neutral_tone_with_five=True)\n",
|
|
" poczatki = []\n",
|
|
" konce =[]\n",
|
|
" tony = []\n",
|
|
" # print(zdanie, zapis_pinyin_3_zdania)\n",
|
|
" for zp3 in zapis_pinyin_3_zdania:\n",
|
|
" p,k,t = poczatek_koniec_ton(zp3)\n",
|
|
" poczatki.append(p)\n",
|
|
" konce.append(k)\n",
|
|
" tony.append(t)\n",
|
|
" return poczatki, konce, tony\n",
|
|
"\n",
|
|
"def dopasuj_dlugosc_wektora(wektor, dlugosc_wektora):\n",
|
|
" if len(wektor)>dlugosc_wektora:\n",
|
|
" wynik = wektor[:dlugosc_wektora]\n",
|
|
" else:\n",
|
|
" wynik = numpy.pad(wektor,(0,dlugosc_wektora-len(wektor)), mode='constant', constant_values=0)\n",
|
|
" return wynik"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 110,
|
|
"metadata": {
|
|
"id": "s4NN04ZpvK3t"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"def poczatki_konce_tony_dla_zdan(zdania, liczba_wejscia):\n",
|
|
" poczatki_wyn = []\n",
|
|
" konce_wyn = []\n",
|
|
" tony_wyn = []\n",
|
|
"\n",
|
|
" for zdanie in zdania:\n",
|
|
" poczatki, konce, tony = wymowy_i_tony_zdania(zdanie)\n",
|
|
"\n",
|
|
" poczatki = dopasuj_dlugosc_wektora(poczatki, liczba_wejscia)\n",
|
|
" konce = dopasuj_dlugosc_wektora(konce, liczba_wejscia)\n",
|
|
" tony = dopasuj_dlugosc_wektora(tony, liczba_wejscia)\n",
|
|
"\n",
|
|
" poczatki_wyn.append(poczatki)\n",
|
|
" konce_wyn.append(konce)\n",
|
|
" tony_wyn.append(tony)\n",
|
|
"\n",
|
|
" return poczatki_wyn, konce_wyn, tony_wyn"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "fyj5LRN3vK3t"
|
|
},
|
|
"source": [
|
|
"### Początki, końcówki i tony wierszy treningowych."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 111,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "vvooWU0hvK3t",
|
|
"outputId": "e08da1e6-3955-4e7b-fcda-a4a75231fec9"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"5959\n",
|
|
"宇高炎暑净\n",
|
|
"['yu3', 'gao1', 'yan2', 'shu3', 'jing4']\n",
|
|
"秋爽飒风来\n",
|
|
"['qiu1', 'shuang3', 'sa4', 'feng1', 'lai2']\n",
|
|
"5959\n",
|
|
"[22 9 22 17 12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
|
" 0 0 0 0 0 0 0 0 0 0 0]\n",
|
|
"[ 3 16 21 2 32 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
|
" 0 0 0 0 0 0 0 0 0 0 0]\n",
|
|
"[3 1 2 3 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n",
|
|
"[13 17 21 4 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
|
" 0 0 0 0 0 0 0 0 0 0 0]\n",
|
|
"[19 30 4 31 12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
|
" 0 0 0 0 0 0 0 0 0 0 0]\n",
|
|
"[1 3 4 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(len(treningowe[\"in\"]))\n",
|
|
"print(treningowe[\"in\"][0])\n",
|
|
"print(lazy_pinyin(treningowe[\"in\"][0], style=Style.TONE3, neutral_tone_with_five=True))\n",
|
|
"print(treningowe[\"out\"][0])\n",
|
|
"print(lazy_pinyin(treningowe[\"out\"][0], style=Style.TONE3, neutral_tone_with_five=True))\n",
|
|
"\n",
|
|
"liczba_wejscia = 35\n",
|
|
"\n",
|
|
"poczatki_treningowe_in, konce_treningowe_in, tony_treningowe_in = poczatki_konce_tony_dla_zdan(treningowe[\"in\"], liczba_wejscia)\n",
|
|
"poczatki_treningowe_out, konce_treningowe_out, tony_treningowe_out = poczatki_konce_tony_dla_zdan(treningowe[\"out\"], liczba_wejscia)\n",
|
|
"\n",
|
|
"print(len(poczatki_treningowe_in))\n",
|
|
"print(poczatki_treningowe_in[0])\n",
|
|
"print(konce_treningowe_in[0])\n",
|
|
"print(tony_treningowe_in[0])\n",
|
|
"print(poczatki_treningowe_out[0])\n",
|
|
"print(konce_treningowe_out[0])\n",
|
|
"print(tony_treningowe_out[0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "ZQd6eh5bvK3u"
|
|
},
|
|
"source": [
|
|
"## Zanurzenia BAAI wierszy treningowych. https://huggingface.co/BAAI/bge-small-zh-v1.5"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 112,
|
|
"metadata": {
|
|
"id": "61vGtuyBvK3u"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# # Load model from HuggingFace Hub\n",
|
|
"# tokenizer = AutoTokenizer.from_pretrained(\"BAAI/bge-small-zh-v1.5\")\n",
|
|
"# model = AutoModel.from_pretrained(\"BAAI/bge-small-zh-v1.5\")\n",
|
|
"# model.eval()\n",
|
|
"\n",
|
|
"# def zanurzenia_zdan(lista_zdan):\n",
|
|
"# # Sentences we want sentence embeddings for\n",
|
|
"# #sentences = [\"样例数据-1样例数据\", \"样例数据-2样例数据\"]\n",
|
|
"# sentences = lista_zdan\n",
|
|
"\n",
|
|
"# # Tokenize sentences\n",
|
|
"# encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')\n",
|
|
"# # for s2p(short query to long passage) retrieval task, add an instruction to query (not add instruction for passages)\n",
|
|
"# # encoded_input = tokenizer([instruction + q for q in queries], padding=True, truncation=True, return_tensors='pt')\n",
|
|
"\n",
|
|
"# # Compute token embeddings\n",
|
|
"# with torch.no_grad():\n",
|
|
"# model_output = model(**encoded_input)\n",
|
|
"# # Perform pooling. In this case, cls pooling.\n",
|
|
"# sentence_embeddings = model_output[0][:, 0]\n",
|
|
"# # normalize embeddings\n",
|
|
"# sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1)\n",
|
|
"# # print(\"Sentence embeddings shape:\", sentence_embeddings.shape)\n",
|
|
"# # print(\"Sentence embeddings:\", sentence_embeddings)\n",
|
|
"\n",
|
|
"# return sentence_embeddings\n",
|
|
"\n",
|
|
"# def zanurzenie_zdania(zdanie):\n",
|
|
"# # Tokenize sentences\n",
|
|
"# encoded_input = tokenizer(zdanie, padding=True, truncation=True, return_tensors='pt')\n",
|
|
"# # for s2p(short query to long passage) retrieval task, add an instruction to query (not add instruction for passages)\n",
|
|
"# # encoded_input = tokenizer([instruction + q for q in queries], padding=True, truncation=True, return_tensors='pt')\n",
|
|
"\n",
|
|
"# # Compute token embeddings\n",
|
|
"# with torch.no_grad():\n",
|
|
"# model_output = model(**encoded_input)\n",
|
|
"# # Perform pooling. In this case, cls pooling.\n",
|
|
"# sentence_embedding = model_output[0][:, 0]\n",
|
|
"# # normalize embeddings\n",
|
|
"# sentence_embedding = torch.nn.functional.normalize(sentence_embedding, p=2, dim=1)\n",
|
|
"\n",
|
|
"# return sentence_embedding"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 113,
|
|
"metadata": {
|
|
"id": "4gt7oqi4vK3u"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# treningowe_in_lista = treningowe[\"in\"].tolist()\n",
|
|
"# treningowe_out_lista = treningowe[\"out\"].tolist()\n",
|
|
"\n",
|
|
"# print(len(treningowe_in_lista))\n",
|
|
"# print(treningowe_in_lista[0])\n",
|
|
"# print(treningowe_out_lista[0])\n",
|
|
"\n",
|
|
"# zanurzenia_treningowe_in = zanurzenia_zdan(treningowe_in_lista)\n",
|
|
"# zanurzenia_treningowe_out = zanurzenia_zdan(treningowe_out_lista)\n",
|
|
"\n",
|
|
"# print(zanurzenia_treningowe_in.shape)\n",
|
|
"# print(zanurzenia_treningowe_in[0])\n",
|
|
"# print(zanurzenia_treningowe_out[0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "TAfBMDqlvK3u"
|
|
},
|
|
"source": [
|
|
"### Tensory - reprezentacje pierwszych wersów wierszy treningowych."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 114,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "hV9h84mrvK3v",
|
|
"outputId": "0fbd174d-e6f3-4a08-892e-0de2ac838f99"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"5959\n",
|
|
"torch.Size([109])\n",
|
|
"tensor([22, 9, 22, 17, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,\n",
|
|
" 16, 21, 2, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1,\n",
|
|
" 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 12, 32,\n",
|
|
" 4])\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"x = []\n",
|
|
"for indeks_wersu_pierwszego in range(len(poczatki_treningowe_in)):\n",
|
|
" poczatki = poczatki_treningowe_in\n",
|
|
" konce = konce_treningowe_in\n",
|
|
" tony = tony_treningowe_in\n",
|
|
" niezerowe_poczatki = [p for p in poczatki[indeks_wersu_pierwszego] if p>0.0]\n",
|
|
" poczatek_ostatniego_znaku = niezerowe_poczatki[len(niezerowe_poczatki)-1]\n",
|
|
" koniec_ostatniego_znaku = konce[indeks_wersu_pierwszego][len(niezerowe_poczatki)-1]\n",
|
|
" ton_ostatniego_znaku = tony[indeks_wersu_pierwszego][len(niezerowe_poczatki)-1]\n",
|
|
" x.append(torch.cat(\n",
|
|
" (\n",
|
|
" # zanurzenia_treningowe_in[indeks_wersu_pierwszego],\n",
|
|
" torch.from_numpy(poczatki_treningowe_in[indeks_wersu_pierwszego]),\n",
|
|
" torch.from_numpy(konce_treningowe_in[indeks_wersu_pierwszego]),\n",
|
|
" torch.from_numpy(tony_treningowe_in[indeks_wersu_pierwszego]),\n",
|
|
" torch.from_numpy(numpy.array([len(niezerowe_poczatki)])),\n",
|
|
" torch.from_numpy(numpy.array([poczatek_ostatniego_znaku])),\n",
|
|
" torch.from_numpy(numpy.array([koniec_ostatniego_znaku])),\n",
|
|
" torch.from_numpy(numpy.array([ton_ostatniego_znaku]))\n",
|
|
" )\n",
|
|
" ))\n",
|
|
"print(len(x))\n",
|
|
"print(x[0].shape)\n",
|
|
"print(x[0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "wkLMwhjevK3v"
|
|
},
|
|
"source": [
|
|
"### Tensory - reprezentacje drugich wersów wierszy treningowych."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 115,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "PqKvzCJavK3v",
|
|
"outputId": "dbfad29e-8312-49ea-a2fe-891604f8092a"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"5959\n",
|
|
"torch.Size([109])\n",
|
|
"tensor([13, 17, 21, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19,\n",
|
|
" 30, 4, 31, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3,\n",
|
|
" 4, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 8, 12,\n",
|
|
" 2])\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"y = []\n",
|
|
"for indeks_wersu_pierwszego in range(len(poczatki_treningowe_out)):\n",
|
|
" poczatki = poczatki_treningowe_out\n",
|
|
" konce = konce_treningowe_out\n",
|
|
" tony = tony_treningowe_out\n",
|
|
" niezerowe_poczatki = [p for p in poczatki[indeks_wersu_pierwszego] if p>0.0]\n",
|
|
" poczatek_ostatniego_znaku = niezerowe_poczatki[len(niezerowe_poczatki)-1]\n",
|
|
" koniec_ostatniego_znaku = konce[indeks_wersu_pierwszego][len(niezerowe_poczatki)-1]\n",
|
|
" ton_ostatniego_znaku = tony[indeks_wersu_pierwszego][len(niezerowe_poczatki)-1]\n",
|
|
" y.append(\n",
|
|
" torch.cat(\n",
|
|
" (\n",
|
|
" # zanurzenia_treningowe_out[indeks_wersu_pierwszego],\n",
|
|
" torch.from_numpy(poczatki_treningowe_out[indeks_wersu_pierwszego]),\n",
|
|
" torch.from_numpy(konce_treningowe_out[indeks_wersu_pierwszego]),\n",
|
|
" torch.from_numpy(tony_treningowe_out[indeks_wersu_pierwszego]),\n",
|
|
" torch.from_numpy(numpy.array([len(niezerowe_poczatki)])),\n",
|
|
" torch.from_numpy(numpy.array([poczatek_ostatniego_znaku])),\n",
|
|
" torch.from_numpy(numpy.array([koniec_ostatniego_znaku])),\n",
|
|
" torch.from_numpy(numpy.array([ton_ostatniego_znaku]))\n",
|
|
" )\n",
|
|
" )\n",
|
|
" )\n",
|
|
"print(len(y))\n",
|
|
"print(y[0].shape)\n",
|
|
"print(y[0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "-8hQv7y0vK3w"
|
|
},
|
|
"source": [
|
|
"## Wejście do sieci neuronowej.\n",
|
|
"### Odpowiadające sobie wersy i kilka losowo dobranych nieodpowiadających sobie wersów."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 116,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "G_zK4KIRvK3w",
|
|
"outputId": "7966a2bc-b13e-4ac1-a0d4-ae6b7a51c181"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"29795\n",
|
|
"tensor([22, 9, 22, 17, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,\n",
|
|
" 16, 21, 2, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 1,\n",
|
|
" 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 12, 32,\n",
|
|
" 4, 13, 17, 21, 4, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 19, 30, 4, 31, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,\n",
|
|
" 3, 4, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 8,\n",
|
|
" 12, 2])\n",
|
|
"29795\n",
|
|
"1\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from random import sample\n",
|
|
"X = []\n",
|
|
"Y = []\n",
|
|
"for indeks_wersu_drugiego in range(len(x)):\n",
|
|
" indeksy = sample(range(len(y)), 5)\n",
|
|
" if indeks_wersu_drugiego not in indeksy:\n",
|
|
" indeksy[0] = indeks_wersu_drugiego\n",
|
|
" for k in indeksy:\n",
|
|
" X.append(\n",
|
|
" torch.cat(\n",
|
|
" (x[indeks_wersu_drugiego], y[k])\n",
|
|
" )\n",
|
|
" )\n",
|
|
" if indeks_wersu_drugiego==k:\n",
|
|
" Y.append(1)\n",
|
|
" else:\n",
|
|
" Y.append(0)\n",
|
|
"\n",
|
|
"print(len(X))\n",
|
|
"print(X[0])\n",
|
|
"print(len(Y))\n",
|
|
"print(Y[0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "4i_4--zBvK3w"
|
|
},
|
|
"source": [
|
|
"## Modele sklearn."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 117,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 74
|
|
},
|
|
"id": "WuDfk70LvK3w",
|
|
"outputId": "35065e9c-8c6b-4837-8018-49cc504f0d90"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "execute_result",
|
|
"data": {
|
|
"text/plain": [
|
|
"MLPClassifier()"
|
|
],
|
|
"text/html": [
|
|
"<style>#sk-container-id-7 {color: black;background-color: white;}#sk-container-id-7 pre{padding: 0;}#sk-container-id-7 div.sk-toggleable {background-color: white;}#sk-container-id-7 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-7 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-7 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-7 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-7 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-7 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-7 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-7 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-7 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-7 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-7 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-7 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-7 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-7 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-7 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-7 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-7 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-7 div.sk-item {position: relative;z-index: 1;}#sk-container-id-7 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-7 div.sk-item::before, #sk-container-id-7 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-7 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-7 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-7 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-7 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-7 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-7 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-7 div.sk-label-container {text-align: center;}#sk-container-id-7 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-7 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-7\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>MLPClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" checked><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">MLPClassifier</label><div class=\"sk-toggleable__content\"><pre>MLPClassifier()</pre></div></div></div></div></div>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"execution_count": 117
|
|
}
|
|
],
|
|
"source": [
|
|
"from sklearn.neural_network import MLPClassifier\n",
|
|
"klasyfikator = MLPClassifier() # activation=\"tanh\"\n",
|
|
"\n",
|
|
"klasyfikator.fit(X, Y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 118,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/",
|
|
"height": 74
|
|
},
|
|
"id": "VpAcpuprvK3x",
|
|
"outputId": "8306fb43-7d24-4b52-f1bd-e0373da43f42"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "execute_result",
|
|
"data": {
|
|
"text/plain": [
|
|
"MLPRegressor()"
|
|
],
|
|
"text/html": [
|
|
"<style>#sk-container-id-8 {color: black;background-color: white;}#sk-container-id-8 pre{padding: 0;}#sk-container-id-8 div.sk-toggleable {background-color: white;}#sk-container-id-8 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-8 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-8 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-8 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-8 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-8 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-8 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-8 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-8 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-8 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-8 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-8 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-8 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-8 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-8 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-8 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-8 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-8 div.sk-item {position: relative;z-index: 1;}#sk-container-id-8 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-8 div.sk-item::before, #sk-container-id-8 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-8 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-8 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-8 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-8 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-8 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-8 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-8 div.sk-label-container {text-align: center;}#sk-container-id-8 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-8 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-8\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>MLPRegressor()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" checked><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">MLPRegressor</label><div class=\"sk-toggleable__content\"><pre>MLPRegressor()</pre></div></div></div></div></div>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"execution_count": 118
|
|
}
|
|
],
|
|
"source": [
|
|
"from sklearn.neural_network import MLPRegressor\n",
|
|
"regresor = MLPRegressor() # activation=\"tanh\"\n",
|
|
"\n",
|
|
"regresor.fit(X, Y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "Q1RYkn4YvK3x"
|
|
},
|
|
"source": [
|
|
"### Początki, końcówki i tony wierszy testowych."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 119,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "HUtENh9bvK3x",
|
|
"outputId": "1cd28e90-bf60-42b5-86a6-9f94548a40e7"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"1490\n",
|
|
"林霭渐浓迷古寺\n",
|
|
"尘烟已远隐青山\n",
|
|
"1490\n",
|
|
"[ 8 22 12 7 3 9 21 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
|
" 0 0 0 0 0 0 0 0 0 0 0]\n",
|
|
"[25 12 21 34 1 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
|
" 0 0 0 0 0 0 0 0 0 0 0]\n",
|
|
"[2 3 4 2 2 3 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n",
|
|
"[16 22 22 22 22 13 17 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
|
" 0 0 0 0 0 0 0 0 0 0 0]\n",
|
|
"[24 21 1 23 25 32 20 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
|
|
" 0 0 0 0 0 0 0 0 0 0 0]\n",
|
|
"[2 1 3 3 3 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(len(testowe[\"in\"]))\n",
|
|
"print(testowe[\"in\"][0])\n",
|
|
"print(testowe[\"out\"][0])\n",
|
|
"\n",
|
|
"liczba_wejscia = 35\n",
|
|
"\n",
|
|
"poczatki_testowe_in, konce_testowe_in, tony_testowe_in = poczatki_konce_tony_dla_zdan(testowe[\"in\"], liczba_wejscia)\n",
|
|
"poczatki_testowe_out, konce_testowe_out, tony_testowe_out = poczatki_konce_tony_dla_zdan(testowe[\"out\"], liczba_wejscia)\n",
|
|
"\n",
|
|
"print(len(poczatki_testowe_in))\n",
|
|
"print(poczatki_testowe_in[0])\n",
|
|
"print(konce_testowe_in[0])\n",
|
|
"print(tony_testowe_in[0])\n",
|
|
"print(poczatki_testowe_out[0])\n",
|
|
"print(konce_testowe_out[0])\n",
|
|
"print(tony_testowe_out[0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "JVmApwk1vK3x"
|
|
},
|
|
"source": [
|
|
"### Zanurzenia BAAI wierszy testowych."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 120,
|
|
"metadata": {
|
|
"id": "f0564iMSvK3x"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# testowe_in_lista = testowe[\"in\"].tolist()\n",
|
|
"# testowe_out_lista = testowe[\"out\"].tolist()\n",
|
|
"\n",
|
|
"# print(len(testowe_in_lista))\n",
|
|
"# print(testowe_in_lista[0])\n",
|
|
"# print(testowe_out_lista[0])\n",
|
|
"\n",
|
|
"# zanurzenia_testowe_in = zanurzenia_zdan(testowe_in_lista)\n",
|
|
"# zanurzenia_testowe_out = zanurzenia_zdan(testowe_out_lista)\n",
|
|
"\n",
|
|
"# print(zanurzenia_testowe_in.shape)\n",
|
|
"# print(zanurzenia_testowe_in[0])\n",
|
|
"# print(zanurzenia_testowe_out[0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "4G8ZrHPJvK3y"
|
|
},
|
|
"source": [
|
|
"### Tensory - reprezentacje pierwszych wersów wierszy testowych."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 121,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "oy4-_mVxvK3y",
|
|
"outputId": "132585d7-5fb1-42b1-8127-e31febec14c8"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"1490\n",
|
|
"torch.Size([109])\n",
|
|
"tensor([ 8, 22, 12, 7, 3, 9, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25,\n",
|
|
" 12, 21, 34, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3,\n",
|
|
" 4, 2, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 21, 1,\n",
|
|
" 4])\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"x_test = []\n",
|
|
"for indeks_wersu_pierwszego in range(len(poczatki_testowe_in)):\n",
|
|
" poczatki = poczatki_testowe_in\n",
|
|
" konce = konce_testowe_in\n",
|
|
" tony = tony_testowe_in\n",
|
|
" niezerowe_poczatki = [p for p in poczatki[indeks_wersu_pierwszego] if p>0.0]\n",
|
|
" poczatek_ostatniego_znaku = niezerowe_poczatki[len(niezerowe_poczatki)-1]\n",
|
|
" koniec_ostatniego_znaku = konce[indeks_wersu_pierwszego][len(niezerowe_poczatki)-1]\n",
|
|
" ton_ostatniego_znaku = tony[indeks_wersu_pierwszego][len(niezerowe_poczatki)-1]\n",
|
|
" x_test.append(torch.cat(\n",
|
|
" (\n",
|
|
" #zanurzenia_testowe_in[indeks_wersu_pierwszego],\n",
|
|
" torch.from_numpy(poczatki_testowe_in[indeks_wersu_pierwszego]),\n",
|
|
" torch.from_numpy(konce_testowe_in[indeks_wersu_pierwszego]),\n",
|
|
" torch.from_numpy(tony_testowe_in[indeks_wersu_pierwszego]),\n",
|
|
" torch.from_numpy(numpy.array([len(niezerowe_poczatki)])),\n",
|
|
" torch.from_numpy(numpy.array([poczatek_ostatniego_znaku])),\n",
|
|
" torch.from_numpy(numpy.array([koniec_ostatniego_znaku])),\n",
|
|
" torch.from_numpy(numpy.array([ton_ostatniego_znaku]))\n",
|
|
" )\n",
|
|
" ))\n",
|
|
"print(len(x_test))\n",
|
|
"print(x_test[0].shape)\n",
|
|
"print(x_test[0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "kKf0Ob0CvK3y"
|
|
},
|
|
"source": [
|
|
"### Tensory - reprezentacje drugich wersów wierszy testowych."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 122,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "khi6U9dlvK3y",
|
|
"outputId": "3b1a4673-1af2-4832-8138-fa7d99139f76"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"1490\n",
|
|
"torch.Size([109])\n",
|
|
"tensor([16, 22, 22, 22, 22, 13, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24,\n",
|
|
" 21, 1, 23, 25, 32, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1,\n",
|
|
" 3, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 17, 20,\n",
|
|
" 1])\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"y_test = []\n",
|
|
"for indeks_wersu_pierwszego in range(len(poczatki_testowe_out)):\n",
|
|
" poczatki = poczatki_testowe_out\n",
|
|
" konce = konce_testowe_out\n",
|
|
" tony = tony_testowe_out\n",
|
|
" niezerowe_poczatki = [p for p in poczatki[indeks_wersu_pierwszego] if p>0.0]\n",
|
|
" poczatek_ostatniego_znaku = niezerowe_poczatki[len(niezerowe_poczatki)-1]\n",
|
|
" koniec_ostatniego_znaku = konce[indeks_wersu_pierwszego][len(niezerowe_poczatki)-1]\n",
|
|
" ton_ostatniego_znaku = tony[indeks_wersu_pierwszego][len(niezerowe_poczatki)-1]\n",
|
|
" y_test.append(\n",
|
|
" torch.cat(\n",
|
|
" (\n",
|
|
" # zanurzenia_testowe_out[indeks_wersu_pierwszego],\n",
|
|
" torch.from_numpy(poczatki_testowe_out[indeks_wersu_pierwszego]),\n",
|
|
" torch.from_numpy(konce_testowe_out[indeks_wersu_pierwszego]),\n",
|
|
" torch.from_numpy(tony_testowe_out[indeks_wersu_pierwszego]),\n",
|
|
" torch.from_numpy(numpy.array([len(niezerowe_poczatki)])),\n",
|
|
" torch.from_numpy(numpy.array([poczatek_ostatniego_znaku])),\n",
|
|
" torch.from_numpy(numpy.array([koniec_ostatniego_znaku])),\n",
|
|
" torch.from_numpy(numpy.array([ton_ostatniego_znaku]))\n",
|
|
" )\n",
|
|
" )\n",
|
|
" )\n",
|
|
"print(len(y_test))\n",
|
|
"print(y_test[0].shape)\n",
|
|
"print(y_test[0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "6xAhS7JFvK3-"
|
|
},
|
|
"source": [
|
|
"## Wejście do sieci neuronowej."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 123,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "9tjiqjaMvK3_",
|
|
"outputId": "86050073-948d-4f3e-d0b3-bd4bf912f89e"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"7450\n",
|
|
"tensor([ 8, 22, 12, 7, 3, 9, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25,\n",
|
|
" 12, 21, 34, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3,\n",
|
|
" 4, 2, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 21, 1,\n",
|
|
" 4, 16, 22, 22, 22, 22, 13, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 24, 21, 1, 23, 25, 32, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,\n",
|
|
" 1, 3, 3, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
|
|
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 17,\n",
|
|
" 20, 1])\n",
|
|
"7450\n",
|
|
"1\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"X_test = []\n",
|
|
"Y_test = []\n",
|
|
"for indeks_wersu_drugiego in range(len(x_test)):\n",
|
|
" indeksy = sample(range(len(y_test)), 5)\n",
|
|
" if indeks_wersu_drugiego not in indeksy:\n",
|
|
" indeksy[0] = indeks_wersu_drugiego\n",
|
|
" for k in indeksy:\n",
|
|
" X_test.append(\n",
|
|
" torch.cat(\n",
|
|
" (x_test[indeks_wersu_drugiego], y_test[k])\n",
|
|
" )\n",
|
|
" )\n",
|
|
" if indeks_wersu_drugiego==k:\n",
|
|
" Y_test.append(1)\n",
|
|
" else:\n",
|
|
" Y_test.append(0)\n",
|
|
"\n",
|
|
"print(len(X_test))\n",
|
|
"print(X_test[0])\n",
|
|
"print(len(Y_test))\n",
|
|
"print(Y_test[0])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "vRLr2jePvK3_"
|
|
},
|
|
"source": [
|
|
"## Przewidywania sieci neuronowych."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 124,
|
|
"metadata": {
|
|
"id": "aV0NMXntvK3_"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"przewidywania_klasyfikatora = klasyfikator.predict(X_test)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 125,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "8xAk6jYsvK3_",
|
|
"outputId": "18748f71-ff77-44fc-f33d-1ebba630d4e7"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"-1.5850154355095964 1.513799312237276 0.20544654697588927 0.17125274897300372\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"przewidywania_regresora = regresor.predict(X_test)\n",
|
|
"print(numpy.min(przewidywania_regresora),numpy.max(przewidywania_regresora),numpy.mean(przewidywania_regresora),numpy.median(przewidywania_regresora))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "Kp830QiMvK4A"
|
|
},
|
|
"source": [
|
|
"### Dokładność na przygotowanych danych testowych."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 126,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "jYKOPOQcvK4A",
|
|
"outputId": "4daa9be0-8ed0-482c-c49e-27bb2f7fa5b9"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"0.8555704697986577\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"### MLPClassifier\n",
|
|
"\n",
|
|
"licznik = 0\n",
|
|
"mianownik = 0\n",
|
|
"for indeks_wersu_pierwszego in range(len(przewidywania_klasyfikatora)):\n",
|
|
" mianownik+=1\n",
|
|
" if przewidywania_klasyfikatora[indeks_wersu_pierwszego]==Y_test[indeks_wersu_pierwszego]:\n",
|
|
" licznik+=1\n",
|
|
"\n",
|
|
"print(licznik/mianownik*1.0)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 127,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "BS6KtYRQvK4A",
|
|
"outputId": "0b50b72f-96d9-4482-d80e-8d68c57f329a"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"0.8377181208053691\n",
|
|
"0.8087248322147651\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"### MLPRegressor\n",
|
|
"\n",
|
|
"# Dopasowanie powyżej 0.5\n",
|
|
"licznik = 0\n",
|
|
"mianownik = 0\n",
|
|
"for indeks_wersu_pierwszego in range(len(przewidywania_regresora)):\n",
|
|
" mianownik+=1\n",
|
|
" if Y_test[indeks_wersu_pierwszego]==1 and przewidywania_regresora[indeks_wersu_pierwszego]>0.5:\n",
|
|
" licznik+=1\n",
|
|
" elif Y_test[indeks_wersu_pierwszego]==0 and przewidywania_regresora[indeks_wersu_pierwszego]<0.5:\n",
|
|
" licznik+=1\n",
|
|
"\n",
|
|
"print(licznik/mianownik*1.0)\n",
|
|
"\n",
|
|
"#Dopasowanie powyżej 0.9\n",
|
|
"licznik = 0\n",
|
|
"mianownik = 0\n",
|
|
"for indeks_wersu_pierwszego in range(len(przewidywania_regresora)):\n",
|
|
" mianownik+=1\n",
|
|
" if Y_test[indeks_wersu_pierwszego]==1 and przewidywania_regresora[indeks_wersu_pierwszego]>0.9:\n",
|
|
" licznik+=1\n",
|
|
" elif Y_test[indeks_wersu_pierwszego]==0 and przewidywania_regresora[indeks_wersu_pierwszego]<0.9:\n",
|
|
" licznik+=1\n",
|
|
"\n",
|
|
"print(licznik/mianownik*1.0)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "jQ4yFJcAvK4A"
|
|
},
|
|
"source": [
|
|
"## Metryka oceniająca proponowanie przez model drugiego wersu.\n",
|
|
"### Jeżeli wśród propozycji nie ma spodziewanego poprawnego wersu, metryka przyjmuje minimalną wartość 0,0.\n",
|
|
"### Im mniej błędnych propozycji , tym wyższy wynik metryki.\n",
|
|
"### Jeżeli model proponuje tylko jeden wers i jest on poprawny, metryka przyjmuje maksymalną wartość 1,0."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 128,
|
|
"metadata": {
|
|
"id": "5yJbzXdnvK4B"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"def jagosz_score(spodziewany_wers,proponowane_wersy):\n",
|
|
" if spodziewany_wers in proponowane_wersy:\n",
|
|
" licznik = 1\n",
|
|
" else:\n",
|
|
" licznik = 0\n",
|
|
" mianownik = len(proponowane_wersy)\n",
|
|
" if mianownik==0:\n",
|
|
" mianownik=1\n",
|
|
" return licznik/mianownik*1.0"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "5W-lowtuvK4B"
|
|
},
|
|
"source": [
|
|
"### Wersja metryki dla całego zbioru."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 129,
|
|
"metadata": {
|
|
"id": "vm6Np4RxvK4B"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"def jagosz_score_dla_zbioru(krotki):\n",
|
|
" licznik = 0\n",
|
|
" mianownik = 0\n",
|
|
"\n",
|
|
" for k in krotki:\n",
|
|
" spodziewany_wers = k[0]\n",
|
|
" proponowane_wersy = k[1]\n",
|
|
" if spodziewany_wers in proponowane_wersy:\n",
|
|
" licznik += 1\n",
|
|
" mianownik += len(proponowane_wersy)\n",
|
|
"\n",
|
|
" if mianownik==0:\n",
|
|
" return 0\n",
|
|
" else:\n",
|
|
" return licznik/mianownik*1.0"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 130,
|
|
"metadata": {
|
|
"id": "mFhtdrE6vK4B"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"wybrane_dane_testowe = sample(range(len(x_test)),10)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "OQ-1vBedvK4C"
|
|
},
|
|
"source": [
|
|
"## MLPClassifier\n",
|
|
"### Proponuje wszystkie wersy, dla których ocena modelu to 1."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 131,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "_QX8BxFOvK4C",
|
|
"outputId": "5b216c48-ecf2-414a-b0e4-ce520691aba7"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"wers pierwszy:\t\t 仁义自修君子安我\n",
|
|
"poprawny wers drugi:\t 诗礼之教家人利贞\n",
|
|
"\n",
|
|
"proponowane drugie wersy: ['丰碑万古纪殊勋', '情牵大地春满人间', '珠圆玉润入口皆甜', '德行梦笔开盛世新篇', '柔水月光披野地天穹', '闲耽笔墨自从容', '一湾碧水日如金', '浮沉历尽许由谁', '人情世故亦须明', '三番顾茅庐皇叔牵龙', '春光入户户新幸福多', '花明柳媚湖上长春', '调一湖春色染绿江淮', '发者斗芳梅葩早帅焉', '一联争首榜元眼花胪', '深恩彻骨万代常萦', '堤前柳浪露春光', '湖山叠韵入我诗囊', '辞雄子建赋拟相如', '行廉拒腐执法如山', '国运弥盛史弥远', '瑞通阆苑琼楼兴百轩', '苗兴汉夏望族振乾坤', '砖雕雕壁画砖马腾空', '歌酣万户九域报长春', '千幅对联红透时光', '赋浓夏盛寓秋实', '东床配西席不是东西', '飞鸿远浦一时惊', '就门外汉示不二门', '天心阁阁内鸽鸽飞阁不飞', '捉刀李白斩斯文', '桨声翻学海海载苦舟', '月光如户窥佳人', '小康致富富人间', '直播日照时时精彩', '共赏芦溪水高下相倾', '喜传桃谷峪马叫人欢', '满腔忧愤铸诗魂', '荷描夏画日钤章', '小桃几树鸟啼红', '五光十色文字之华', '柳垂水面翠溶南北风', '伟雄心志白鹤相知', '小金龙瑞雪兆丰年', '松风竹韵多抒情', '当辨忠奸岂可负全民', '心中无欲不争春', '樽彝错杂古道犹存', '诗礼之教家人利贞', '出门去白面书生', '中古生华易古往冬', '攀龙附凤欲攀彩凤缘', '地连南北日星恒久晖', '吸烟无益肺摧残', '蔼峰亦寄诗仙情', '杯中寂寞不曾空', '油田崛起为生产护航', '综一代典成一家言', '辩雕春囿德莹秋天', '碧峰犹冷寺前春', '微言明义苦谏纠偏', '尘凡皆妄昧贪嗔痴愚', '草逢蓬室至家中', '平野百里高山九重', '诗篇避俗不酬人', '官吏非全力吏全力官', '利人始是大修行', '鸣钟食鼎甘田土之出', '手携一集质于通人', '方塘九夏溢荷香', '扬风遗泽仁厚人家', '风中落叶泣无声', '雅情雅韵仙客有约', '开枰先弃是非心', '陈天保颂代地道终', '飞腾雅典腾飞环球', '天明独倚楼坐到黄昏', '塞上无诗诗圣上乘诗', '不甘卖命换虚名', '花样年华联若洒可钦', '寄人篱下始知求', '误将弟子入迷宫', '悲秋远去一孤鸿', '盛世兴盛事鹏举云天']\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: True\n",
|
|
"liczba proponowanych wersów: 85\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.011764705882352941\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n",
|
|
"wers pierwszy:\t\t 雪落千山静\n",
|
|
"poprawny wers drugi:\t 冰封万水寒\n",
|
|
"\n",
|
|
"proponowane drugie wersy: ['人懒几生尘', '清心长保真', '花香不在多', '草木已含英', '松摇古谷风', '鹤踪上潭冰', '兰馨溢神州', '四海奋人心', '贤媳举扇陪', '莫愁女儿红', '春归柳色红', '此味几人同', '寻源路不迷', '头彩出中原', '绿野寄仙踪', '玉律始调阳', '梦舟载月明', '难教白日闲', '慢煲绿豆汤', '智者忍违缘', '真风再发扬', '梅韵贺新年', '一街太平歌', '桃花自美容', '诗带好风吟', '行藏固有期', '大功扫叛臣', '赤水得玄珠', '诗兴不无神', '月分老梅香', '禹甸沐春风', '徒临洗药泉', '一樽欢暂同', '案头月一樽', '英雄是达人', '木栽门内闲', '移山志不忘', '碧柳锁长亭', '风定水无波', '醉后赋离骚', '胡蝶飞南园', '长河没晓天', '三江福寿图', '脉脉万重心', '高处看浮云', '两乡明月心', '高悬不畏风', '牖含遍岭春', '少年是网虫', '豆灯照墨新', '水凉难泡茶', '中华共颂贤臣', '户内美色呈辉', '府藏石铫图', '宛在岱中行', '王府池子深', '夕观沧海云', '酒醉好题诗', '梅迎跃进春', '箫声向远天', '莫向外头看', '思量枕席功夫', '松风如在弦', '行吟必向民心', '这边环境安宁', '羊年事事吉祥', '年年有盼头', '碧浪皱红霞', '山深虎迹踪', '衣兴露脐', '伴梦眠老屋', '民以食为天', '初日临春虚', '惩凶儆效尤', '梅花落我肩', '搴舟破晓风', '衣间不带尘', '人我法皆空', '何防凿壁偷', '艺高大胆人', '花荣上海人', '天地月常圆', '敞襟天地宽', '起宏图', '开光佛自由', '时泰喜黎民', '月轮碾古今', '而今当宝存', '心静自然凉', '山转路无穷', '白日奈我何', '春心蝶最知', '千花夹寺门', '无肉也能行', '夜寂鸟啼空', '江涌古今潮', '尝鲜食鱼羊', '烽火起云间', '塞外朔风寒', '巧拙尚相悬', '中庭松桂姿', '品德讲道德', '秋波我梦吟', '香飘十里风', '莺歌鹧鸪天']\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
|
|
"liczba proponowanych wersów: 105\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.0\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n",
|
|
"wers pierwszy:\t\t 莫漫三槐羡王自\n",
|
|
"poprawny wers drugi:\t 须先百忍学张公\n",
|
|
"\n",
|
|
"proponowane drugie wersy: ['尘烟已远隐青山', '人如无欲意何求', '他乡月好俺思亲', '丰碑万古纪殊勋', '珠圆玉润入口皆甜', '毅力凝成跨海桥', '海深寻秘展雄才', '青山醉向一樽横', '丰年留客足鸡豚', '美德重红幸福门', '万般幻态杳随风', '须先百忍学张公', '柔水月光披野地天穹', '珠帘难掩月多情', '铮铮梅蕾半含春', '莺燕对舞艳阳天', '青山四面纳千流', '闲庭信步哼欢歌', '千般柳絮游子心', '殷殷老叶护花红', '偶观雨燕栖寒檐', '居身常抱玉壶清', '一腔热血死难消', '而今初信及笄年', '红灯素户好风光', '江河自古向东流', '清风两袖带回家', '一湾碧水日如金', '三阳开泰颂廉明', '锦葵昂面为迎光', '看三国志欲何为', '江山犹得助诗豪', '人情世故亦须明', '早将秋韵入诗怀', '千杯浊酒醉恒长', '庖丁自有解牛刀', '扬州十里小红庐', '云本无心醉雨嫣', '心牵雨骤夜归人', '规矩者应晓方圆', '青灯久作故人看', '猴腾广宇绽琼花', '茫然回首奈何桥', '三联书韵醉今生', '风临荷盏窃清香', '豪杰意气傲云天', '璋玉无瑕耀祥光', '花好月圆夜长明', '心宽纳海老夫能', '闲愁起处是红尘', '扬鞭跃马马行空', '然乎者一字乾坤', '且求秋实果一园', '我犹未脱长康痴', '鹰翔蓝宇戏搏云', '甘棠播爱岁流金', '胸中消尽是非心', '篆铭山石荡浮华', '英年奋进惜时光', '春江柳线乱弹琴', '文联叶问斩妖魔', '梅香葱岭缀长虹', '仙人指路点迷津', '万般气象壮龙年', '催开玉蕊艳无边', '清塘浴月鹤逐风', '时急方须济世才', '国展宏图烈士欣', '卷帘烧烛看梅花', '圆缺朗月也浮名', '梧桐叶上得秋声', '五色龙溪抱江流', '仰头天幕挂霓虹', '新朋正续进行时', '一指清凉尽染秋', '小小儿郎立路中', '无田有业不为贫', '一溪柳绿到谁家', '俨然天竺古先生', '西湖乡梦约谁寻', '听竹尤增几许清', '武夷阳羡品俱馨', '堪求五体保安康', '一川杨柳笼和风', '韧节有意杜虚名', '两个老头打秋风', '直抒快意墨千秋', '名享三奇显祖公', '常将劲节负秋霜', '国持德政著宏篇', '阳朔沿水显花荣', '善男信女拜观音', '举杯邀月到凡尘', '兰心未老梦如初', '伤心羁旅断愁肠', '制笙立乐业兴邦', '亭藏绿荫万般幽', '英雄力困也求人', '神驹腾跃吉祥年', '夜灯勤礼塔中仙', '飞鸿远浦一时惊', '相思一点老了谁', '花间酌酒赏蝶飞', '风雨人生鉴知音', '清泉有趣自通融', '山长水远恨重重', '游春岂料梦成真', '年丰人寿沁诗声', '浮舟水面尽飞花', '三令五申还有贪', '霜飞两鬓孔明灯', '重回津渡觅缠绵', '月光如户窥佳人', '得心应手手头宽', '子孙常读未烧书', '小康致富富人间', '情凝大地重如山', '元兴世盛展宏图', '优良业绩绩可观', '长将远景引天边', '相如廉颇璧千钧', '满腔忧愤铸诗魂', '万般殷切候佳音', '身前淡泊莫非尘', '行吟战马啸征尘', '关羽无停觅长兄', '春风惠我也惠人', '小可参禅入几分', '常教翰墨作鼓吹', '荷描夏画日钤章', '澄天月隐星今宵', '空海星辰宇宙流', '小桃几树鸟啼红', '皇城玉阙夕阳斜', '花贴幼子悦童心', '四行热泪洒苍颜', '双琴欣鼓杏花天', '人间重义树新风', '轻舟破浪过千山', '修身松竹有高风', '马舞龙韵续华章', '山影盘龙月钓珠', '风吹杨柳翠还柔', '一丛老竹梦于胸', '时临峻岭采浮云', '关外又开一朵奇葩', '幽梦一帘总是春', '崇廉尚德岛尚书', '钻杆穿地唱欢歌', '专门收拾搞重婚', '松风竹韵多抒情', '再将粉黛沁于宣', '秉公执法树廉风', '当惊阁老好风光', '心中无欲不争春', '尚德定可净人心', '吹牛煮海火收兵', '凝才情血汗磨刀', '梅花傲雪迓长春', '出门去白面书生', '江郎梦里得犹神', '金声玉振展奇才', '黄金灿灿冷如冰', '春深似海梦无痕', '停琴问月正归乡', '文中已现老成心', '迎来信誉达三江', '迎春老树发新芽', '云压水岸浪逐云', '他日凌云傲世间', '无休往日浩如烟', '相思不减病扶墙', '杜曲幸有桑麻田', '挖坑华夏葬儒顽', '金龙对舞戏中来', '水城画卷展宏图', '江心美景湛空明', '深谋远虑有心人', '风吹枫落枫随风', '蔼峰亦寄诗仙情', '江山忧患老英雄', '竹下新笋一色鲜', '月窥秋水潜伏人', '黄粱入梦悟尘心', '沉年古木韵临风', '度日如年席卷八荒', '是非自古要三思', '山窗月透一痕青', '联内音声欠古风', '风流人物看今朝', '横窗疏影绽梅花', '千篇一律竞同声', '人心锁锁锁还开', '钢骨框架筑高楼', '闲聊岁月万年篇', '义常若水润人心', '狂歌万里御风行', '半坡翠竹耸蓝天', '须从肝胆识英豪', '无须雪尽水开光', '酣摊夏苑恋风情', '人生一笑尽良朋', '流觞逸兴写兰亭', '心情更比落花差', '沉吟雁字续前缘', '满园桃李尽争春', '星沉水底任鱼吞', '饮酒月前独自愁', '千竿节气叠浪花', '莺恋柳色月常依', '人威毕竟胜天威', '空樽对月直哆嗦', '微言明义苦谏纠偏', '年尾年头送温馨', '禅房墨案写梅香', '还期雅韵再相闻', '始知赢女善吹箫', '岁变难更意里人', '深感人情冷似冰', '千枝红杏闹春光', '仙风道骨验方肠', '草逢蓬室至家中', '廉政为民常山情', '三园猗顿晋商宗', '炎黄子孙志超群', '落花空结水姻缘', '归来燕子柳初新', '霞飞洞外洞飞霞', '利人始是大修行', '蛇头就是做中人', '一壶沧海洗微名', '丛林花草总痴心', '清风临案窃书魂', '长江十堰显神功', '汉宫春风暮烟中', '银河阻断鹊双飞', '花到极清始觉香', '红梅傲雪笑枝青', '阿咸才俊翠壶冰', '有名海粟雪难埋', '浴凫飞鸳晚悠悠', '天长日丽艳桃花', '收来花信燕声中', '文章有道拟施行', '闻名色变探花郎', '弥陀含笑放光辉', '几杯醉后月光高', '闲生百态网中人', '泠泠若水慧兰馨', '莫愁燕去有回时', '方塘九夏溢荷香', '心贪名利岂能廉', '开卷细同贤者谋', '尘梦长随月色清', '转身应把泪珠弹', '安心是药更无方', '千秋剑气护忠魂', '风中落叶泣无声', '三生有幸遇知音', '峰丘暗许百年情', '一窗竹影又经风', '必须意识玩绵拳', '开枰先弃是非心', '本祖巍峨八剑堂', '家兴国兴万事兴', '花间蕊貌润如珠', '不甘卖命换虚名', '临碣颂古诗风悠', '风凉彼岸柳垂帘', '百千万亿归于零', '洁烹芬美进嘉宾', '水上石头泵可行', '寄人篱下始知求', '两行悠然静临风', '笑迎世纪浴春光', '悲秋远去一孤鸿', '夏种稻田要维家', '花好焉无惬意时', '两行白鹭上青天', '无缘去日怎追风', '笑谈成败慎出兵', '岗连高鼎天为峰', '春来江水绿如蓝', '新朋草舍对趣联', '绿如李叶亮如霜', '联文对句限孤平']\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: True\n",
|
|
"liczba proponowanych wersów: 286\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.0034965034965034965\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n",
|
|
"wers pierwszy:\t\t 到此酒垆应更好\n",
|
|
"poprawny wers drugi:\t 问渠嘉树是谁栽\n",
|
|
"\n",
|
|
"proponowane drugie wersy: ['尘烟已远隐青山', '而今华夏振雄风', '人如无欲意何求', '丰碑万古纪殊勋', '问渠嘉树是谁栽', '珠圆玉润入口皆甜', '毅力凝成跨海桥', '三杯白酒乐成仙', '青山醉向一樽横', '丰年留客足鸡豚', '美德重红幸福门', '珠帘难掩月多情', '莺燕对舞艳阳天', '青山四面纳千流', '雅文相识好文人', '千般柳絮游子心', '殷殷老叶护花红', '满头霜雪和新梅', '一腔热血死难消', '梦离幻境是非多', '江河自古向东流', '一湾碧水日如金', '小楼吹砌玉生寒', '锦葵昂面为迎光', '看三国志欲何为', '闹中取静看擂台', '庖丁自有解牛刀', '扬州十里小红庐', '心牵雨骤夜归人', '青山一座共云闲', '猴腾广宇绽琼花', '茫然回首奈何桥', '花狎春云露搅和', '青山绿水皆低头', '三联书韵醉今生', '更移阳朔七堆山', '豪杰意气傲云天', '璋玉无瑕耀祥光', '花好月圆夜长明', '心宽纳海老夫能', '闲愁起处是红尘', '鸿才立世展鸿图', '然乎者一字乾坤', '青牛出谷李成熟', '鹰翔蓝宇戏搏云', '甘棠播爱岁流金', '篆铭山石荡浮华', '一拍之下就轻生', '弄潮帆影港城新', '莲花亲水意崇廉', '英年奋进惜时光', '放歌音厚是功深', '故人书自日边来', '又何必三日闻香', '文联叶问斩妖魔', '万般气象壮龙年', '催开玉蕊艳无边', '清塘浴月鹤逐风', '圆缺朗月也浮名', '暖香十里软莺声', '闲敲棋子落灯花', '新朋正续进行时', '一指清凉尽染秋', '日移松影过禅床', '无田有业不为贫', '俨然天竺古先生', '西湖乡梦约谁寻', '纵情狂乱毁根基', '韧节有意杜虚名', '两个老头打秋风', '追求亮丽美人图', '腐败必被人民纠', '鼓瑟难得悦美人', '国持德政著宏篇', '阳朔沿水显花荣', '千般爱意眼中留', '好将长铗护黎民', '伤心羁旅断愁肠', '英雄力困也求人', '且由明月洗尘心', '金花覆没白丢盘', '神驹腾跃吉祥年', '翠柳清风伴杏娇', '万家台笠雨声甘', '蛇听燕语颂春光', '相思一点老了谁', '花间酌酒赏蝶飞', '磋砣无奈怨摽梅', '清泉有趣自通融', '山长水远恨重重', '情如水淡话沧桑', '捉刀李白斩斯文', '重回津渡觅缠绵', '不言第一海胸襟', '乌啼古树惹乡愁', '月光如户窥佳人', '雪漫大地秋光失', '新梅雪橇雅幽行', '官居宰相望王侯', '元兴世盛展宏图', '一意孤行不回头', '优良业绩绩可观', '相如廉颇璧千钧', '江水源源发电来', '行吟战马啸征尘', '关羽无停觅长兄', '春风惠我也惠人', '小可参禅入几分', '常教翰墨作鼓吹', '澄天月隐星今宵', '空海星辰宇宙流', '小桃几树鸟啼红', '红旗漫卷息狼烟', '兄弟同吟夜雨陪', '双琴欣鼓杏花天', '人间重义树新风', '轻舟破浪过千山', '汉高祖业耀千秋', '横波一顾白云旁', '修身松竹有高风', '晓霞含愁看早梅', '风吹杨柳翠还柔', '幽梦一帘总是春', '崇廉尚德岛尚书', '钻杆穿地唱欢歌', '专门收拾搞重婚', '松风竹韵多抒情', '当惊阁老好风光', '黄叶飘零比较烦', '尚德定可净人心', '吹牛煮海火收兵', '凝才情血汗磨刀', '梅花傲雪迓长春', '出门去白面书生', '江郎梦里得犹神', '停琴问月正归乡', '文中已现老成心', '何堪心乱雨难读', '迎来信誉达三江', '迎春老树发新芽', '云压水岸浪逐云', '还将歌赋寄相思', '吕布吕蒙常用兵', '相思不减病扶墙', '杜曲幸有桑麻田', '金龙对舞戏中来', '三春经纬织民图', '回家时不见秋鸿', '满园丹桂早飘香', '风吹枫落枫随风', '蔼峰亦寄诗仙情', '江山忧患老英雄', '火牛曾胜敌千军', '沉年古木韵临风', '落蕊黯留一挽香', '度日如年席卷八荒', '是非自古要三思', '寻思流水意如何', '风流人物看今朝', '横窗疏影绽梅花', '千篇一律竞同声', '人心锁锁锁还开', '钢骨框架筑高楼', '闲聊岁月万年篇', '昨天白骨续缘乎', '狂歌万里御风行', '半坡翠竹耸蓝天', '无须雪尽水开光', '酣摊夏苑恋风情', '流觞逸兴写兰亭', '辩雕春囿德莹秋天', '心若非良必惹悲', '沉吟雁字续前缘', '灯残襟冷感情无', '满园桃李尽争春', '杏雨又同桃雨飞', '竹叶入唇醉耋龄', '星沉水底任鱼吞', '千竿节气叠浪花', '莺恋柳色月常依', '人威毕竟胜天威', '空樽对月直哆嗦', '年尾年头送温馨', '禅房墨案写梅香', '还期雅韵再相闻', '岁变难更意里人', '常无寂寞自吟诗', '千枝红杏闹春光', '三园猗顿晋商宗', '当知沦落也从容', '炎黄子孙志超群', '落花空结水姻缘', '归来燕子柳初新', '霞飞洞外洞飞霞', '利人始是大修行', '蛇头就是做中人', '睡梦中难辨东西', '一壶沧海洗微名', '丛林花草总痴心', '浓妆淡抹总相宜', '银河阻断鹊双飞', '东风无处不扬花', '离合不损月分明', '红梅傲雪笑枝青', '阿咸才俊翠壶冰', '春逢喜气盛迎门', '有名海粟雪难埋', '浴凫飞鸳晚悠悠', '天长日丽艳桃花', '是大英雄自虚怀', '杏风桃韵语花辞', '收来花信燕声中', '四朝金柱识高才', '万里清风驻洁江', '泠泠若水慧兰馨', '莫愁燕去有回时', '单恋独予一江秋', '积德途中永不停', '千秋剑气护忠魂', '五申三令不成规', '名多友多著作多', '峰丘暗许百年情', '必须意识玩绵拳', '归家且遂十年心', '弄箫谱曲假樵歌', '风急烟轻水送舟', '本祖巍峨八剑堂', '临碣颂古诗风悠', '烟波浩渺任龙舒', '昙花怎晓夜幽长', '风凉彼岸柳垂帘', '百千万亿归于零', '芳华满室映丹青', '门盈喜气喜盈门', '寄人篱下始知求', '名城泉水润京腔', '误将弟子入迷宫', '自然觉悟不由他', '笑迎世纪浴春光', '河朔膏腴古督亢', '悲秋远去一孤鸿', '两行白鹭上青天', '岗连高鼎天为峰', '春来江水绿如蓝', '年青有志遂良图', '新朋草舍对趣联', '再邀瘦月饮三巡', '慎独湖湘第一人', '松麓邀云放月筝', '绿如李叶亮如霜']\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: True\n",
|
|
"liczba proponowanych wersów: 250\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.004\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n",
|
|
"wers pierwszy:\t\t 漫向楼台寻婉转\n",
|
|
"poprawny wers drugi:\t 重回津渡觅缠绵\n",
|
|
"\n",
|
|
"proponowane drugie wersy: ['尘烟已远隐青山', '人如无欲意何求', '丰碑万古纪殊勋', '德操应效柏和松', '问渠嘉树是谁栽', '珠圆玉润入口皆甜', '海深寻秘展雄才', '丰年留客足鸡豚', '美德重红幸福门', '珠帘难掩月多情', '铮铮梅蕾半含春', '莺燕对舞艳阳天', '雅文相识好文人', '一心二用两头空', '闲庭信步哼欢歌', '千般柳絮游子心', '映日桑榆重晚晴', '殷殷老叶护花红', '偶观雨燕栖寒檐', '满头霜雪和新梅', '一腔热血死难消', '梦离幻境是非多', '而今初信及笄年', '红灯素户好风光', '江河自古向东流', '清风两袖带回家', '酒醉不如伴月眠', '锦葵昂面为迎光', '浮沉历尽许由谁', '看三国志欲何为', '江山犹得助诗豪', '人情世故亦须明', '闹中取静看擂台', '早将秋韵入诗怀', '庖丁自有解牛刀', '扬州十里小红庐', '心牵雨骤夜归人', '与君同作太平人', '青山一座共云闲', '猴腾广宇绽琼花', '言少言多尽美谈', '好留明月九千秋', '茫然回首奈何桥', '青山绿水皆低头', '三联书韵醉今生', '更移阳朔七堆山', '风临荷盏窃清香', '豪杰意气傲云天', '璋玉无瑕耀祥光', '花好月圆夜长明', '心宽纳海老夫能', '闲愁起处是红尘', '鸿才立世展鸿图', '扬鞭跃马马行空', '然乎者一字乾坤', '且求秋实果一园', '南燕离巢北国春', '青牛出谷李成熟', '我犹未脱长康痴', '鹰翔蓝宇戏搏云', '甘棠播爱岁流金', '胸中消尽是非心', '篆铭山石荡浮华', '一拍之下就轻生', '凌云揽月步高科', '弄潮帆影港城新', '莲花亲水意崇廉', '英年奋进惜时光', '放歌音厚是功深', '春江柳线乱弹琴', '故人书自日边来', '文联叶问斩妖魔', '梅香葱岭缀长虹', '学问无穷博古今', '仙人指路点迷津', '催开玉蕊艳无边', '国运弥盛史弥远', '清塘浴月鹤逐风', '卷帘烧烛看梅花', '圆缺朗月也浮名', '大河滚滚尽淘沙', '梧桐叶上得秋声', '五色龙溪抱江流', '闲敲棋子落灯花', '仰头天幕挂霓虹', '新朋正续进行时', '俨然天竺古先生', '武夷阳羡品俱馨', '拨弦弹水月偏题', '纵情狂乱毁根基', '笛声浅扣暗推窗', '一川杨柳笼和风', '韧节有意杜虚名', '两个老头打秋风', '洗出芙蓉九点青', '半空摇晃寻常仁', '追求亮丽美人图', '腐败必被人民纠', '名享三奇显祖公', '鼓瑟难得悦美人', '明月来时渚落霜', '常将劲节负秋霜', '国持德政著宏篇', '阳朔沿水显花荣', '善男信女拜观音', '千般爱意眼中留', '好将长铗护黎民', '兰心未老梦如初', '世间最难得弟兄', '亭藏绿荫万般幽', '英雄力困也求人', '且由明月洗尘心', '金花覆没白丢盘', '薄酒三杯吊芳魂', '普荫全球亿万生', '神驹腾跃吉祥年', '孔圣有才死后尊', '飞鸿远浦一时惊', '往事依然笔底新', '四方称霸一魔方', '相思一点老了谁', '花间酌酒赏蝶飞', '磋砣无奈怨摽梅', '风雨人生鉴知音', '清泉有趣自通融', '山长水远恨重重', '游春岂料梦成真', '年丰人寿沁诗声', '浮舟水面尽飞花', '情如水淡话沧桑', '霜飞两鬓孔明灯', '重回津渡觅缠绵', '不言第一海胸襟', '月光如户窥佳人', '半帘秋梦鸟也酥', '对苑繁华万蕾新', '得心应手手头宽', '子孙常读未烧书', '雪漫大地秋光失', '情凝大地重如山', '常想旁通不对头', '一意孤行不回头', '优良业绩绩可观', '路远始于跬步间', '长将远景引天边', '相如廉颇璧千钧', '满腔忧愤铸诗魂', '万般殷切候佳音', '身前淡泊莫非尘', '江水源源发电来', '行吟战马啸征尘', '关羽无停觅长兄', '春风惠我也惠人', '扁舟轻荡水云长', '常教翰墨作鼓吹', '澄天月隐星今宵', '空海星辰宇宙流', '小桃几树鸟啼红', '花贴幼子悦童心', '红旗漫卷息狼烟', '思量枕席功夫', '双琴欣鼓杏花天', '人间重义树新风', '汉高祖业耀千秋', '横波一顾白云旁', '修身松竹有高风', '山影盘龙月钓珠', '风吹杨柳翠还柔', '一丛老竹梦于胸', '时临峻岭采浮云', '幽梦一帘总是春', '崇廉尚德岛尚书', '钻杆穿地唱欢歌', '下周农历已秋分', '专门收拾搞重婚', '松风竹韵多抒情', '秉公执法树廉风', '当惊阁老好风光', '心中无欲不争春', '黄叶飘零比较烦', '尚德定可净人心', '吹牛煮海火收兵', '凝才情血汗磨刀', '梅花傲雪迓长春', '骨头坚硬好八连', '出门去白面书生', '江郎梦里得犹神', '畅谈国事一腔情', '金声玉振展奇才', '黄金灿灿冷如冰', '春深似海梦无痕', '停琴问月正归乡', '文中已现老成心', '何堪心乱雨难读', '迎来信誉达三江', '迎春老树发新芽', '田园播种喜开篇', '月转疏枝过女墙', '云压水岸浪逐云', '还将歌赋寄相思', '辉煌岁月请珍惜', '无休往日浩如烟', '相思不减病扶墙', '杜曲幸有桑麻田', '挖坑华夏葬儒顽', '三春经纬织民图', '润雨催开草甸花', '满园丹桂早飘香', '深谋远虑有心人', '吸烟无益肺摧残', '风吹枫落枫随风', '收篇难阻浪涛狂', '月窥秋水潜伏人', '沉年古木韵临风', '度日如年席卷八荒', '是非自古要三思', '寻思流水意如何', '风流人物看今朝', '横窗疏影绽梅花', '千篇一律竞同声', '人心锁锁锁还开', '钢骨框架筑高楼', '闲聊岁月万年篇', '义常若水润人心', '狂歌万里御风行', '半坡翠竹耸蓝天', '帆连水色接天涯', '无须雪尽水开光', '综一代典成一家言', '酣摊夏苑恋风情', '人生一笑尽良朋', '流觞逸兴写兰亭', '心情更比落花差', '死后欣然上八仙', '沉吟雁字续前缘', '灯残襟冷感情无', '满园桃李尽争春', '碧峰犹冷寺前春', '竹叶入唇醉耋龄', '星沉水底任鱼吞', '饮酒月前独自愁', '千竿节气叠浪花', '莺恋柳色月常依', '人威毕竟胜天威', '空樽对月直哆嗦', '微言明义苦谏纠偏', '年尾年头送温馨', '禅房墨案写梅香', '始知赢女善吹箫', '岁变难更意里人', '烛影摇红夜不眠', '千枝红杏闹春光', '廉政为民常山情', '三园猗顿晋商宗', '炎黄子孙志超群', '落花空结水姻缘', '归来燕子柳初新', '梦远还托风导游', '利人始是大修行', '隔村香送稻花肥', '丛林花草总痴心', '浓妆淡抹总相宜', '滇池画舫储蛮声', '清风临案窃书魂', '汉宫春风暮烟中', '东风无处不扬花', '红梅傲雪笑枝青', '阿咸才俊翠壶冰', '丹楹喜庆福临门', '有名海粟雪难埋', '天长日丽艳桃花', '杏风桃韵语花辞', '文章有道拟施行', '四朝金柱识高才', '收聚白露仙人梦', '羊跃人欢艳阳春', '弥陀含笑放光辉', '几杯醉后月光高', '泠泠若水慧兰馨', '修身多读养心书', '莫愁燕去有回时', '方塘九夏溢荷香', '转身应把泪珠弹', '单恋独予一江秋', '积德途中永不停', '安心是药更无方', '名多友多著作多', '风中落叶泣无声', '三生有幸遇知音', '峰丘暗许百年情', '必须意识玩绵拳', '开枰先弃是非心', '归家且遂十年心', '翠林深处玄珠湖', '弄箫谱曲假樵歌', '本祖巍峨八剑堂', '家兴国兴万事兴', '临碣颂古诗风悠', '昙花怎晓夜幽长', '风凉彼岸柳垂帘', '百千万亿归于零', '芳华满室映丹青', '门盈喜气喜盈门', '水上石头泵可行', '寄人篱下始知求', '名城泉水润京腔', '两行悠然静临风', '误将弟子入迷宫', '河朔膏腴古督亢', '悲秋远去一孤鸿', '夏种稻田要维家', '两行白鹭上青天', '笑谈成败慎出兵', '岗连高鼎天为峰', '春来江水绿如蓝', '年青有志遂良图', '新朋草舍对趣联', '绿如李叶亮如霜', '联文对句限孤平']\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: True\n",
|
|
"liczba proponowanych wersów: 319\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.003134796238244514\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n",
|
|
"wers pierwszy:\t\t 晋水育菩提叶叠千层呈瑞气\n",
|
|
"poprawny wers drugi:\t 玉兰生妙境花开十里献诗情\n",
|
|
"\n",
|
|
"proponowane drugie wersy: ['慈心抒自在手慈眼慈甘露慈', '冬雪心明落叶入泥还育春', '情歌依旧仍随秋水染夕阳', '陈家颜割落耳朵颜面才是东家', '千师作赋笔下新村韵有余', '织天织地织出人间一个家', '一心一意书中可造万般星才', '冰天雪地寒鱼破镜钓江翁', '黄山挥笔新春祝酒绘宏图', '穷途哭恸阮籍猖狂独咏怀', '杏林栽福地仁心妙术起沉疴', '十年非忘本学子该当底气足', '台阶通化境佛寺巍巍气韵深', '一僧击暮鼓南无长诵保平安', '有彩有华偶得佳联少雅人', '英灵不昧览兹蹇蹇匪躬愚', '王粲传遣词备悉预须认定大纲', '八方铺锦绣紫燕娇啼羡物华', '春溪赴梦入径带来山外情', '一匕图始皇自有我易水悲歌', '挥洒一身才气令岁月流芳', '寻梅雪岭无畏寒侵自有香', '待儿曹婚娶莼羹鲈脍慷慨知还', '瞧瞧无品文人赊去空格拍马填', '窗移晌午红蕖深处妹撑船', '琼花瑶叶雨浥芙蕖冉冉香', '皖吟风徽歌韵老村美景若诗', '离别时章柳折残山花静待来春', '中华娇子红塔山云烟贵烟', '字成一体大戟长枪跌宕书', '说地谈天妙语千词趣味生', '人言虽可信但防渭水混泾江', '年年七夕望月观星念恋人', '强国兴邦关注三农百业展新猷', '勤习十载几案当知学子心', '兴亡在抱百千年史鉴咏冰心', '梅花千万点报得人间锦绣春', '鲲鹏翔瀚宇激越高昂自在身', '楚山飞楚曲八方唱就楚风淳', '旌旗飞舞千桡激起粤精神', '玉轮升碧海清辉广照出天然', '壮怀逸兴盛世鸿儒聚鹭园', '终日惟杜门蔬食经卷绳床', '只身游燕赵淡泊无定水云舒', '啜甘须忆苦纵登高位犹纳清风', '登高极目从兹俗虑自消沉', '千家纳福转型跨越晋城兴', '嫩竹舒新绿倚遍春风翠袖寒', '心描山水情一枝一叶总关情', '心朝北斗祖国万岁路铺金', '新年缔良缘月圆人寿谱新歌', '平台屹屹出水蛟腾碧浪中', '四十年苦戍曾教瀚海变桑田', '风亦软云亦淡独怜一地月华', '万般思绪华章雅集自陶然', '为环球献瑞沧桑洗礼万年冰', '指告后昆代代永铭国耻激扬', '更漏子蝶恋花千滴满见泪沙流', '党承柱石九州四海举红旗', '剩有渊明趣随宜对秋色持醪', '浓妆淡抹秋暮霜枫写意诗', '春光照大地九州共绘小康图', '心游翰海叹这般风月似醉似痴', '张皇祖道哀丝豪竹别离间', '千秋华夏千秋业更需龙裔担当', '玉兰生妙境花开十里献诗情', '风和牵细浪托盘荷畔捧玉珠', '对对总求工自对需如互对工', '淡烟浮动摇魂湖月对姮娥', '汇九霄圣脉犀江溢彩梦园芳', '太白泼墨天上月云石上诗', '俗子凡胎从来市井最人情', '山留菩境石鼓一悬梦万年', '文辉百载一轮旭日照庭兰', '裁诗月下诗成月下月尤明', '梅影横窗瘦南枝微弄雪精神', '福音云外播心泉涌玉接灵源', '做戏人看戏人戏内戏人看人', '涵秀沐风雨春风化雨润人心', '何须斗气眼下齐心破一曹', '壮大联坛一片云霞灿锦城', '胸怀税务戮力耕耘收税献丹心', '秉公办事牢记四知品自高', '龙狮舞彩八方乐奏颂长春', '锤镰记取红色党旗血染成', '秦有十八子笔墨抒意论春秋', '德宏章贡修文悦礼敦古铄今', '蔺廉有隙终对刎颈死生交', '雪绽一树花漫园寒梅点点香', '动车牵北南绿城煤城双轨接龙', '熄八年烽火侵华历史鉴千秋', '荷叶一池满铺开澄碧坦荡人心', '辉煌禹甸水漾芙蕖万象新', '盼美丽中国收入倍增成就小康']\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: True\n",
|
|
"liczba proponowanych wersów: 94\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.010638297872340425\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n",
|
|
"wers pierwszy:\t\t 国运逢春好\n",
|
|
"poprawny wers drugi:\t 民心向党红\n",
|
|
"\n",
|
|
"proponowane drugie wersy: ['人懒几生尘', '清心长保真', '花香不在多', '草木已含英', '松摇古谷风', '鹤踪上潭冰', '兰馨溢神州', '四海奋人心', '珍簟展方床', '贤媳举扇陪', '冰封万水寒', '莫愁女儿红', '春归柳色红', '此味几人同', '寻源路不迷', '头彩出中原', '玉律始调阳', '梦舟载月明', '难教白日闲', '慢煲绿豆汤', '倾城倾国', '智者忍违缘', '真风再发扬', '梅韵贺新年', '一街太平歌', '桃花自美容', '诗带好风吟', '行藏固有期', '大功扫叛臣', '赤水得玄珠', '诗兴不无神', '月分老梅香', '禹甸沐春风', '徒临洗药泉', '一樽欢暂同', '雪厚松袅云', '案头月一樽', '英雄是达人', '木栽门内闲', '庙略久论兵', '重担重担人', '碧柳锁长亭', '风定水无波', '胡蝶飞南园', '世态笑炎凉', '长河没晓天', '风笔绘春秋', '三江福寿图', '脉脉万重心', '高处看浮云', '两乡明月心', '高悬不畏风', '牖含遍岭春', '少年是网虫', '豆灯照墨新', '水凉难泡茶', '中华共颂贤臣', '户内美色呈辉', '府藏石铫图', '池浅韵牵波', '宛在岱中行', '王府池子深', '寺与山争鲜', '夕观沧海云', '眉月静横窗', '酒醉好题诗', '梅迎跃进春', '箫声向远天', '莫向外头看', '思量枕席功夫', '家庭祥和', '松风如在弦', '这边环境安宁', '醉酒吐真情', '年年有盼头', '碧浪皱红霞', '山深虎迹踪', '衣兴露脐', '民心向党红', '伴梦眠老屋', '禅味涤心胸', '民以食为天', '初日临春虚', '恨别鸟惊心', '朝槿散幽香', '惩凶儆效尤', '梅花落我肩', '陕州人杰灵', '鸟语落花山', '搴舟破晓风', '百岭见千娇', '衣间不带尘', '小曲品三春', '人我法皆空', '何防凿壁偷', '艺高大胆人', '花荣上海人', '天地月常圆', '红雨浸黄云', '敞襟天地宽', '开光佛自由', '时泰喜黎民', '月轮碾古今', '而今当宝存', '春入鸟能言', '偏遇有情人', '心静自然凉', '山转路无穷', '白日奈我何', '春心蝶最知', '千花夹寺门', '无肉也能行', '夜寂鸟啼空', '江涌古今潮', '尝鲜食鱼羊', '烽火起云间', '塞外朔风寒', '巧拙尚相悬', '两手作生涯', '中庭松桂姿', '云外一声钟', '品德讲道德', '秋波我梦吟', '香飘十里风', '莺歌鹧鸪天']\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: True\n",
|
|
"liczba proponowanych wersów: 125\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.008\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n",
|
|
"wers pierwszy:\t\t 马蹄有韵分平仄\n",
|
|
"poprawny wers drugi:\t 公正不阿辩是非\n",
|
|
"\n",
|
|
"proponowane drugie wersy: ['德操应效柏和松', '举帜遵章共展才', '岩上青藤攀壁升', '毅力凝成跨海桥', '峰平径长难藏景', '后乐先忧范弟昆', '美德重红幸福门', '暮忆三秋雁字长', '烟柳风丝拂岸斜', '竹林满山景隽幽', '悦己悦人悦世间', '阔水滔滔有酒仙', '绕岭风清捧玉珠', '木讷的人难启迪', '野渡闲游一叶舟', '日月同辉光景嫣', '可恨蛮牛不识琴', '酒醉不如伴月眠', '烈日巡山果生香', '画就雾云笔墨香', '笔盖古今三千年', '江山犹得助诗豪', '人情世故亦须明', '克俭尚勤播誉名', '闹中取静看擂台', '几管笛箫奏响春', '起棒还将玉宇清', '规矩者应晓方圆', '猴腾广宇绽琼花', '言少言多尽美谈', '茫然回首奈何桥', '对仄对平对友情', '地阔难及贪欲长', '大漠孤烟古道长', '新颖文章秋水清', '璋玉无瑕耀祥光', '花好月圆夜长明', '国泰民安幸福多', '且求秋实果一园', '鹰翔蓝宇戏搏云', '浅草雷门愧下关', '篆铭山石荡浮华', '弄潮帆影港城新', '英年奋进惜时光', '电力惠民百业兴', '千树争高有健才', '梦里飞花静闻香', '梅香葱岭缀长虹', '国运弥盛史弥远', '时急方须济世才', '国展宏图烈士欣', '碧野连天满目春', '明月清风野菊香', '糊口养家望父滩', '五色龙溪抱江流', '笔点涟漪见水平', '动动脑筋动静无', '不叫俗尘污本真', '仰头天幕挂霓虹', '喜报频传战士家', '竹韵梅香总可人', '一指清凉尽染秋', '日移松影过禅床', '小小儿郎立路中', '听竹尤增几许清', '大丈夫能屈能伸', '武夷阳羡品俱馨', '拨弦弹水月偏题', '洞口经春长薜萝', '莫指云山认故乡', '粉黛淡施十五光', '象郡云烟锁桂梧', '雪融春到春融雪', '造客茅庐得孔明', '名享三奇显祖公', '明月来时渚落霜', '常将劲节负秋霜', '阳朔沿水显花荣', '好将长铗护黎民', '映月二泉人世情', '普荫全球亿万生', '枝上空吹故国风', '孔圣有才死后尊', '翠柳清风伴杏娇', '新庆交封暨缅封', '飞鸿远浦一时惊', '风雨人生鉴知音', '三令五申还有贪', '达业欣成万户楼', '重回津渡觅缠绵', '点水蜻蜓赏绿来', '道士身怀童子功', '步步登高上岳阳', '热血沸腾意若何', '对苑繁华万蕾新', '宝地佛临济世人', '得心应手手头宽', '时论同高尺五天', '一捧廉泉岛外春', '防不胜防贼近身', '知耻明荣胸臆宽', '每觉邻山云最多', '路远始于跬步间', '满腔忧愤铸诗魂', '何堪永夜漏更寒', '纵览清江高士怀', '春风惠我也惠人', '小可参禅入几分', '扁舟轻荡水云长', '常教翰墨作鼓吹', '空海星辰宇宙流', '兄弟同吟夜雨陪', '贞慧何辞驻翠颜', '竹韵真箫彻夜吹', '伟雄心志白鹤相知', '正在柳洲接柳风', '文庙弦音奏凯频', '马舞龙韵续华章', '诗心永驻圣洁泉', '山影盘龙月钓珠', '风吹杨柳翠还柔', '关外又开一朵奇葩', '幽梦一帘总是春', '崇廉尚德岛尚书', '大势所趋水如蓝', '何处箫声断客肠', '皓月两轮水面逢', '世事抛开谁为谁', '仙境田园隐棹声', '掷笔从戎壮士名', '夫再礼让妻再争', '秉公执法树廉风', '剌史同游忆月明', '尚德定可净人心', '凝才情血汗磨刀', '骨头坚硬好八连', '江郎梦里得犹神', '黄金灿灿冷如冰', '四海龙兴艺术潮', '腹有奸谋即兽心', '玉鼎沉香影寂寥', '月转疏枝过女墙', '他日凌云傲世间', '百年盟约好时光', '海上风云浪几何', '杜曲幸有桑麻田', '小鸟放歌岁月甜', '竹下新笋一色鲜', '俯首甘为孺子牛', '落蕊黯留一挽香', '踏雪归来鬓染香', '联内音声欠古风', '谁到篱前问姓名', '马上蓝天宇拓宽', '动地惊天事业昌', '山水相依诗易描', '美丽季节万里春', '流觞逸兴写兰亭', '慷慨悲歌魏晋风', '心情更比落花差', '五井丰碑今日游', '傲物诗文有劲风', '死后欣然上八仙', '碧峰犹冷寺前春', '年尾年头送温馨', '诗卷长流天地间', '岁变难更意里人', '深感人情冷似冰', '每到云稠方想伞', '廉政为民常山情', '北海泛舟携孔融', '刻炬成诗韵可观', '早上欣逢笔底兄', '月满楼台鸳梦香', '用心感知世上音', '落花空结水姻缘', '古韵古风誉古今', '处世何须带伪装', '霞飞洞外洞飞霞', '隔村香送稻花肥', '翠扇红衣十里香', '丛林花草总痴心', '清风临案窃书魂', '华夏农民开喜镰', '汉宫春风暮烟中', '花到极清始觉香', '变易何难志士心', '闹市俗人涮肥羊', '古木生芽不是春', '红梅傲雪笑枝青', '片段岚光落画屏', '古渡渔翁岁月悠', '天乐鸣时简子游', '碧简须雕次第仙', '浴凫飞鸳晚悠悠', '蛇对赠君东海福', '御世今惟不动尊', '红枣绿茶岭上香', '文章有道拟施行', '羊跃人欢艳阳春', '月色从来未换新', '泠泠若水慧兰馨', '开卷细同贤者谋', '尘梦长随月色清', '转身应把泪珠弹', '水畔青田走马牛', '吉日迎亲有贵人', '名多友多著作多', '峰丘暗许百年情', '诚信经营财路宽', '风急烟轻水送舟', '满腹心言共汝谈', '本祖巍峨八剑堂', '家兴国兴万事兴', '平子归田不为穷', '山水襟怀我不如', '古道雁行倦戏秋', '小院兰亭柳下风', '明月半堂清我心', '两行悠然静临风', '误将弟子入迷宫', '夏种稻田要维家', '岗连高鼎天为峰', '春来江水绿如蓝', '南粤万家景色新', '新朋草舍对趣联', '愁恨两分杨柳风', '绿如李叶亮如霜']\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
|
|
"liczba proponowanych wersów: 228\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.0\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n",
|
|
"wers pierwszy:\t\t 山中习静观朝槿\n",
|
|
"poprawny wers drugi:\t 洞口经春长薜萝\n",
|
|
"\n",
|
|
"proponowane drugie wersy: ['人如无欲意何求', '举帜遵章共展才', '岩上青藤攀壁升', '问渠嘉树是谁栽', '济助家乡晃美名', '草内多藏五步蛇', '病入膏肓有治疗', '后乐先忧范弟昆', '好同蝉窟映三潭', '美德重红幸福门', '疑是瑶台月下逢', '锦绣春归百姓家', '鼓瑟还从曲里来', '雀跃鱼翔谐乐多', '莺燕对舞艳阳天', '雅文相识好文人', '映日桑榆重晚晴', '暮忆三秋雁字长', '烟柳风丝拂岸斜', '竹林满山景隽幽', '山雨不来仍有风', '悦己悦人悦世间', '阔水滔滔有酒仙', '燕子三双戏柳烟', '豪赌毁他上进心', '绕岭风清捧玉珠', '野渡闲游一叶舟', '含露芙蓉醉海棠', '日月同辉光景嫣', '可恨蛮牛不识琴', '酒醉不如伴月眠', '烈日巡山果生香', '画就雾云笔墨香', '悔被浮名牵累多', '坑我此生此袋烟', '笔盖古今三千年', '克俭尚勤播誉名', '闹中取静看擂台', '几管笛箫奏响春', '千树落花别样红', '扬州十里小红庐', '云本无心醉雨嫣', '起棒还将玉宇清', '言少言多尽美谈', '茫然回首奈何桥', '地阔难及贪欲长', '大漠孤烟古道长', '新颖文章秋水清', '璋玉无瑕耀祥光', '花好月圆夜长明', '国泰民安幸福多', '然乎者一字乾坤', '且求秋实果一园', '南燕离巢北国春', '我犹未脱长康痴', '浅草雷门愧下关', '融月新醅慢慢尝', '篆铭山石荡浮华', '弄潮帆影港城新', '翠竹山花恋我归', '千树争高有健才', '梦里飞花静闻香', '学问无穷博古今', '富裕安康福万家', '国运弥盛史弥远', '时急方须济世才', '祭酒干杯国子光', '国展宏图烈士欣', '卷帘烧烛看梅花', '碧野连天满目春', '明月清风野菊香', '糊口养家望父滩', '披甲拳飞对手逃', '五色龙溪抱江流', '淡淡菊香盈袖中', '笔点涟漪见水平', '动动脑筋动静无', '不叫俗尘污本真', '竹韵梅香总可人', '一指清凉尽染秋', '日移松影过禅床', '小小儿郎立路中', '意气风发马晓春', '涛落沙新畔易留', '德雨润开廉洁花', '胜算亦防失误时', '月老三分秋水寒', '听竹尤增几许清', '大丈夫能屈能伸', '武夷阳羡品俱馨', '弹毕雅曲听和声', '人爱人钦人喜欢', '洞口经春长薜萝', '一川杨柳笼和风', '莫指云山认故乡', '韧节有意杜虚名', '两个老头打秋风', '春梦几枝与醉痴', '粉黛淡施十五光', '草木蔫枯晒绿洲', '落日也将暮色描', '象郡云烟锁桂梧', '潋滟江波扑簌风', '人且清心同步行', '造客茅庐得孔明', '名享三奇显祖公', '明月来时渚落霜', '阳朔沿水显花荣', '好将长铗护黎民', '北海波清映日黄', '兰心未老梦如初', '映月二泉人世情', '世间最难得弟兄', '亭藏绿荫万般幽', '唯有读书声最佳', '枝上空吹故国风', '笑问书生君是谁', '孔圣有才死后尊', '翠柳清风伴杏娇', '新庆交封暨缅封', '飞鸿远浦一时惊', '往事依然笔底新', '湖月高低映绿杨', '风过泸州带酒香', '相思一点老了谁', '风雨人生鉴知音', '小觑浮名对酒歌', '浮舟水面尽飞花', '早出晚归皆自然', '翠袖拂空一抹烟', '却为心肝伤脑筋', '三令五申还有贪', '达业欣成万户楼', '点水蜻蜓赏绿来', '不言第一海胸襟', '重义轻财德道深', '道士身怀童子功', '步步登高上岳阳', '热血沸腾意若何', '对苑繁华万蕾新', '宝地佛临济世人', '时论同高尺五天', '知耻明荣胸臆宽', '每觉邻山云最多', '常想旁通不对头', '一意孤行不回头', '烛影摇红步步娇', '路远始于跬步间', '银烛金杯映翠眉', '江水源源发电来', '纵览清江高士怀', '关羽无停觅长兄', '落木落红落寂生', '小可参禅入几分', '九世同居号义门', '常教翰墨作鼓吹', '空海星辰宇宙流', '小桃几树鸟啼红', '皇城玉阙夕阳斜', '兄弟同吟夜雨陪', '思量枕席功夫', '贞慧何辞驻翠颜', '竹韵真箫彻夜吹', '落日栖霞赏故园', '处世无方只守诚', '正在柳洲接柳风', '一言九鼎定心神', '文庙弦音奏凯频', '却诩心田少欠情', '晓霞含愁看早梅', '山影盘龙月钓珠', '关外又开一朵奇葩', '幽梦一帘总是春', '崇廉尚德岛尚书', '大势所趋水如蓝', '何处箫声断客肠', '专门收拾搞重婚', '皓月两轮水面逢', '良夜清风月满湖', '世事抛开谁为谁', '仙境田园隐棹声', '夫再礼让妻再争', '三爱首推书友茶', '黄叶飘零比较烦', '玉液溶溶滴露来', '骨头坚硬好八连', '金声玉振展奇才', '四海龙兴艺术潮', '杨絮舞出风感觉', '腹有奸谋即兽心', '玉鼎沉香影寂寥', '德作福田三世修', '小阁亦存明月身', '室壁裂时蟢网缝', '月转疏枝过女墙', '云压水岸浪逐云', '十里桃花相见欢', '吕布吕蒙常用兵', '他日凌云傲世间', '樱树花开迓客图', '百年盟约好时光', '相思不减病扶墙', '海上风云浪几何', '小鸟放歌岁月甜', '润雨催开草甸花', '静夜遐思枕月眠', '竹下新笋一色鲜', '月窥秋水潜伏人', '俯首甘为孺子牛', '水稻风多不待秋', '圣子甘心为罪囚', '踏雪归来鬓染香', '一旦出名人气高', '联内音声欠古风', '横窗疏影绽梅花', '中散孤高故不凡', '谁到篱前问姓名', '柳叶随风画美图', '马上蓝天宇拓宽', '山水相依诗易描', '美丽季节万里春', '部长铁男不定还', '心若非良必惹悲', '慷慨悲歌魏晋风', '五井丰碑今日游', '傲物诗文有劲风', '对燕双飞碧柳间', '载笔须来阙下游', '无主孤魂百姓怜', '死后欣然上八仙', '杏雨又同桃雨飞', '碧峰犹冷寺前春', '竹叶入唇醉耋龄', '兰桂齐芳福乐门', '白面书生尽奶油', '饮酒月前独自愁', '莺恋柳色月常依', '乍响雷鸣下箸惊', '年尾年头送温馨', '诗卷长流天地间', '岁变难更意里人', '深感人情冷似冰', '烛影摇红夜不眠', '每到云稠方想伞', '廉政为民常山情', '刻炬成诗韵可观', '早上欣逢笔底兄', '月满楼台鸳梦香', '四面荷花扑画船', '虎年缘接冬奥情', '古韵古风誉古今', '两制宏谋百代功', '梦远还托风导游', '处世何须带伪装', '杉木果林桃李荣', '翠扇红衣十里香', '华夏农民开喜镰', '花到极清始觉香', '变易何难志士心', '闹市俗人涮肥羊', '耕月三分播墨花', '燕翅劈开两岸闲', '落草潜伏十字坡', '古渡渔翁岁月悠', '天乐鸣时简子游', '碧简须雕次第仙', '浴凫飞鸳晚悠悠', '落月桐轩寂手谈', '蛇对赠君东海福', '御世今惟不动尊', '红枣绿茶岭上香', '天长日丽艳桃花', '竹色四时也不移', '收聚白露仙人梦', '羊跃人欢艳阳春', '美色必将随后衰', '月色从来未换新', '世事浮云感慨多', '修身多读养心书', '莫愁燕去有回时', '开卷细同贤者谋', '尘梦长随月色清', '转身应把泪珠弹', '积德途中永不停', '水畔青田走马牛', '雅意如茶自在闲', '吉日迎亲有贵人', '一地纸灰寂寞人', '名多友多著作多', '丹翠含悲珠泪流', '老眼欣观四化图', '诚信经营财路宽', '水墨胡涂浪漫稀', '风急烟轻水送舟', '满腹心言共汝谈', '紫燕翻飞柳泛青', '本祖巍峨八剑堂', '家兴国兴万事兴', '花间蕊貌润如珠', '秋叶梧桐扫地僧', '平子归田不为穷', '秋月春风惹梦思', '山水襟怀我不如', '古道雁行倦戏秋', '小院兰亭柳下风', '巢就休询燕几时', '明月半堂清我心', '白虎岭中白虎来', '寄人篱下始知求', '凝注流霜秋九梦', '两行悠然静临风', '误将弟子入迷宫', '河朔膏腴古督亢', '夏种稻田要维家', '岗连高鼎天为峰', '春来江水绿如蓝', '何必杀鸡笑野猴', '南粤万家景色新', '新朋草舍对趣联', '愁恨两分杨柳风', '慎独湖湘第一人', '千载长天起大云', '松麓邀云放月筝', '绿如李叶亮如霜']\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: True\n",
|
|
"liczba proponowanych wersów: 324\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.0030864197530864196\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n",
|
|
"wers pierwszy:\t\t 园主意拳拳不惜重金寻国宝\n",
|
|
"poprawny wers drugi:\t 门生情切切敢捐大义铸心碑\n",
|
|
"\n",
|
|
"proponowane drugie wersy: ['慈心抒自在手慈眼慈甘露慈', '羊羔跪乳乌鸦哺母且思恩', '陈家颜割落耳朵颜面才是东家', '织天织地织出人间一个家', '一心一意书中可造万般星才', '瑶台丽日扬善弘仁一片天', '满腔诚信长赢福利四时春', '诗联并进渝水巴山起异军', '穷途哭恸阮籍猖狂独咏怀', '杏林栽福地仁心妙术起沉疴', '十年非忘本学子该当底气足', '英灵不昧览兹蹇蹇匪躬愚', '万锋笔健联台宿将舞龙文', '王粲传遣词备悉预须认定大纲', '八方铺锦绣紫燕娇啼羡物华', '一匕图始皇自有我易水悲歌', '待儿曹婚娶莼羹鲈脍慷慨知还', '琼花瑶叶雨浥芙蕖冉冉香', '皖吟风徽歌韵老村美景若诗', '中华娇子红塔山云烟贵烟', '字成一体大戟长枪跌宕书', '人言虽可信但防渭水混泾江', '年年七夕望月观星念恋人', '兴亡在抱百千年史鉴咏冰心', '网间谈情话外有音沁语非', '梅花千万点报得人间锦绣春', '鲲鹏翔瀚宇激越高昂自在身', '江边楼上商女欢讴玉树歌', '玉轮升碧海清辉广照出天然', '壮怀逸兴盛世鸿儒聚鹭园', '只身游燕赵淡泊无定水云舒', '啜甘须忆苦纵登高位犹纳清风', '赤隆扬赤帜九州共庆小康春', '登高极目从兹俗虑自消沉', '云天碧水横练陈江七彩颜', '心描山水情一枝一叶总关情', '心朝北斗祖国万岁路铺金', '纵横三界明察正果自如来', '平台屹屹出水蛟腾碧浪中', '风亦软云亦淡独怜一地月华', '为环球献瑞沧桑洗礼万年冰', '篇篇墨语字字无非寂寞吟', '指告后昆代代永铭国耻激扬', '更漏子蝶恋花千滴满见泪沙流', '剩有渊明趣随宜对秋色持醪', '帆樯蔽日风送筝声多在船', '满园春正好八面和风给力多', '春光照大地九州共绘小康图', '心游翰海叹这般风月似醉似痴', '张皇祖道哀丝豪竹别离间', '新枝染翠嫩柳初舒春色娇', '玉兰生妙境花开十里献诗情', '亭自皇朝建青松擎月可知情', '风和牵细浪托盘荷畔捧玉珠', '欢迎学者此道终须启后人', '城苑真娇育德千秋桃李馨', '对对总求工自对需如互对工', '蟾光初照银桨徐摇万点星', '太白泼墨天上月云石上诗', '东坡曾醉人间天上两婵娟', '山留菩境石鼓一悬梦万年', '归程渺渺涕泪常邀笑梦来', '梅影横窗瘦南枝微弄雪精神', '民生有幸嘣出实心得惠仁', '做戏人看戏人戏内戏人看人', '一琴兼一鹤仰承清献旧家风', '夜立桥上明月不流岁月流', '远镜微镜透镜反光镜常问伯奇', '胸怀税务戮力耕耘收税献丹心', '秉公办事牢记四知品自高', '锤镰记取红色党旗血染成', '秦有十八子笔墨抒意论春秋', '信众安详善念广播皆属真人', '德宏章贡修文悦礼敦古铄今', '蔺廉有隙终对刎颈死生交', '雪绽一树花漫园寒梅点点香', '垂名万古百战功随乃若何', '熄八年烽火侵华历史鉴千秋']\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
|
|
"liczba proponowanych wersów: 78\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.0\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"for indeks_wersu_pierwszego in wybrane_dane_testowe:\n",
|
|
" wers_pierwszy = testowe[\"in\"][indeks_wersu_pierwszego]\n",
|
|
" print(\"wers pierwszy:\\t\\t\", wers_pierwszy)\n",
|
|
" poprawny_wers_drugi = testowe[\"out\"][indeks_wersu_pierwszego]\n",
|
|
" print(\"poprawny wers drugi:\\t\", poprawny_wers_drugi)\n",
|
|
" print()\n",
|
|
"\n",
|
|
" reprezentacja_wersu_pierwszego = x_test[indeks_wersu_pierwszego]\n",
|
|
" mozliwe_indeksy_wersu_drugiego = []\n",
|
|
" for indeks_wersu_drugiego in range(len(y_test)):\n",
|
|
" reprezentacja_wersu_drugiego = y_test[indeks_wersu_drugiego]\n",
|
|
" wejscie_do_MLP = torch.cat((reprezentacja_wersu_pierwszego, reprezentacja_wersu_drugiego))\n",
|
|
" if klasyfikator.predict([wejscie_do_MLP])[0] == 1:\n",
|
|
" mozliwe_indeksy_wersu_drugiego.append(indeks_wersu_drugiego)\n",
|
|
"\n",
|
|
" proponowane_wersy = [testowe[\"out\"][i] for i in mozliwe_indeksy_wersu_drugiego]\n",
|
|
" print(\"proponowane drugie wersy:\", proponowane_wersy)\n",
|
|
" print(\"czy poprawny wers jest pośród proponowanych wersów?:\", poprawny_wers_drugi in proponowane_wersy)\n",
|
|
" print(\"liczba proponowanych wersów:\", len(proponowane_wersy))\n",
|
|
" print()\n",
|
|
"\n",
|
|
" print(\"wynik przyjętej metryki:\", jagosz_score(poprawny_wers_drugi, proponowane_wersy))\n",
|
|
" print()\n",
|
|
" print(\"-\"*50)\n",
|
|
" print()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "k4-MITYevK4C"
|
|
},
|
|
"source": [
|
|
"## MLPRegressor\n",
|
|
"### Proponuje wszystkie wersy, dla których ocena modelu jest większa niż 0,9."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 132,
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "opt_lWIfvK4C",
|
|
"outputId": "ebacc9bf-8055-4ce2-a0e6-e659a42867f4"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"wers pierwszy:\t\t 仁义自修君子安我\n",
|
|
"poprawny wers drugi:\t 诗礼之教家人利贞\n",
|
|
"\n",
|
|
"proponowane drugie wersy: []\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
|
|
"liczba proponowanych wersów: 0\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.0\n",
|
|
"wyjaśnienie - największe wartości przewidywań\n",
|
|
" indeks wartosc\n",
|
|
"275 275 0.698\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n",
|
|
"wers pierwszy:\t\t 雪落千山静\n",
|
|
"poprawny wers drugi:\t 冰封万水寒\n",
|
|
"\n",
|
|
"proponowane drugie wersy: ['思量枕席功夫']\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
|
|
"liczba proponowanych wersów: 1\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.0\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n",
|
|
"wers pierwszy:\t\t 莫漫三槐羡王自\n",
|
|
"poprawny wers drugi:\t 须先百忍学张公\n",
|
|
"\n",
|
|
"proponowane drugie wersy: []\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
|
|
"liczba proponowanych wersów: 0\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.0\n",
|
|
"wyjaśnienie - największe wartości przewidywań\n",
|
|
" indeks wartosc\n",
|
|
"275 275 0.662909\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n",
|
|
"wers pierwszy:\t\t 到此酒垆应更好\n",
|
|
"poprawny wers drugi:\t 问渠嘉树是谁栽\n",
|
|
"\n",
|
|
"proponowane drugie wersy: []\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
|
|
"liczba proponowanych wersów: 0\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.0\n",
|
|
"wyjaśnienie - największe wartości przewidywań\n",
|
|
" indeks wartosc\n",
|
|
"1317 1317 0.710679\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n",
|
|
"wers pierwszy:\t\t 漫向楼台寻婉转\n",
|
|
"poprawny wers drugi:\t 重回津渡觅缠绵\n",
|
|
"\n",
|
|
"proponowane drugie wersy: []\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
|
|
"liczba proponowanych wersów: 0\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.0\n",
|
|
"wyjaśnienie - największe wartości przewidywań\n",
|
|
" indeks wartosc\n",
|
|
"275 275 0.754231\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n",
|
|
"wers pierwszy:\t\t 晋水育菩提叶叠千层呈瑞气\n",
|
|
"poprawny wers drugi:\t 玉兰生妙境花开十里献诗情\n",
|
|
"\n",
|
|
"proponowane drugie wersy: ['龙传人赞华夏名镇迎东风', '陈家颜割落耳朵颜面才是东家', '一心一意书中可造万般星才', '一僧击暮鼓南无长诵保平安', '挥洒一身才气令岁月流芳', '皖吟风徽歌韵老村美景若诗', '人言虽可信但防渭水混泾江', '兴亡在抱百千年史鉴咏冰心', '神州筑梦四方创业业峥嵘', '梅花千万点报得人间锦绣春', '广府古城百花芳草淹春秋', '终日惟杜门蔬食经卷绳床', '只身游燕赵淡泊无定水云舒', '地铁迎春西咸大道正龙吟', '千家纳福转型跨越晋城兴', '踏渭河潮宝鸡好梦咏春风', '嫩竹舒新绿倚遍春风翠袖寒', '心描山水情一枝一叶总关情', '玩木玩瓷玩玉玩核玩转岁月赏岁月数十年华笑世间不懂', '莺鹂鸣柳恰有南风雁早乘', '汇九霄圣脉犀江溢彩梦园芳', '俗子凡胎从来市井最人情', '做戏人看戏人戏内戏人看人', '远镜微镜透镜反光镜常问伯奇', '龙狮舞彩八方乐奏颂长春', '秦有十八子笔墨抒意论春秋', '月月风风叫你顿首献感情']\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
|
|
"liczba proponowanych wersów: 27\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.0\n",
|
|
"wyjaśnienie - największe wartości przewidywań\n",
|
|
" indeks wartosc\n",
|
|
"369 369 1.050771\n",
|
|
"876 876 1.034025\n",
|
|
"754 754 1.031349\n",
|
|
"646 646 1.026354\n",
|
|
"1306 1306 1.025188\n",
|
|
"1130 1130 1.021680\n",
|
|
"1374 1374 1.018667\n",
|
|
"48 48 1.016918\n",
|
|
"460 460 1.009228\n",
|
|
"82 82 1.001561\n",
|
|
"744 744 0.984034\n",
|
|
"506 506 0.981010\n",
|
|
"1466 1466 0.972648\n",
|
|
"594 594 0.958953\n",
|
|
"16 16 0.957970\n",
|
|
"1136 1136 0.956761\n",
|
|
"1247 1247 0.955357\n",
|
|
"517 517 0.947808\n",
|
|
"195 195 0.932003\n",
|
|
"639 639 0.930620\n",
|
|
"530 530 0.929867\n",
|
|
"290 290 0.918470\n",
|
|
"687 687 0.908896\n",
|
|
"715 715 0.907384\n",
|
|
"1062 1062 0.907318\n",
|
|
"697 697 0.906565\n",
|
|
"1387 1387 0.903108\n",
|
|
"608 608 0.899049\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n",
|
|
"wers pierwszy:\t\t 国运逢春好\n",
|
|
"poprawny wers drugi:\t 民心向党红\n",
|
|
"\n",
|
|
"proponowane drugie wersy: ['思量枕席功夫']\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
|
|
"liczba proponowanych wersów: 1\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.0\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n",
|
|
"wers pierwszy:\t\t 马蹄有韵分平仄\n",
|
|
"poprawny wers drugi:\t 公正不阿辩是非\n",
|
|
"\n",
|
|
"proponowane drugie wersy: []\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
|
|
"liczba proponowanych wersów: 0\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.0\n",
|
|
"wyjaśnienie - największe wartości przewidywań\n",
|
|
" indeks wartosc\n",
|
|
"275 275 0.640859\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n",
|
|
"wers pierwszy:\t\t 山中习静观朝槿\n",
|
|
"poprawny wers drugi:\t 洞口经春长薜萝\n",
|
|
"\n",
|
|
"proponowane drugie wersy: []\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
|
|
"liczba proponowanych wersów: 0\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.0\n",
|
|
"wyjaśnienie - największe wartości przewidywań\n",
|
|
" indeks wartosc\n",
|
|
"275 275 0.760342\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n",
|
|
"wers pierwszy:\t\t 园主意拳拳不惜重金寻国宝\n",
|
|
"poprawny wers drugi:\t 门生情切切敢捐大义铸心碑\n",
|
|
"\n",
|
|
"proponowane drugie wersy: ['龙传人赞华夏名镇迎东风', '一僧击暮鼓南无长诵保平安', '皖吟风徽歌韵老村美景若诗', '广府古城百花芳草淹春秋', '龙狮舞彩八方乐奏颂长春']\n",
|
|
"czy poprawny wers jest pośród proponowanych wersów?: False\n",
|
|
"liczba proponowanych wersów: 5\n",
|
|
"\n",
|
|
"wynik przyjętej metryki: 0.0\n",
|
|
"\n",
|
|
"--------------------------------------------------\n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"for indeks_wersu_pierwszego in wybrane_dane_testowe:\n",
|
|
" wers_pierwszy = testowe[\"in\"][indeks_wersu_pierwszego]\n",
|
|
" print(\"wers pierwszy:\\t\\t\", wers_pierwszy)\n",
|
|
" poprawny_wers_drugi = testowe[\"out\"][indeks_wersu_pierwszego]\n",
|
|
" print(\"poprawny wers drugi:\\t\", poprawny_wers_drugi)\n",
|
|
" print()\n",
|
|
"\n",
|
|
" reprezentacja_wersu_pierwszego = x_test[indeks_wersu_pierwszego]\n",
|
|
" mozliwe_indeksy_wersu_drugiego = []\n",
|
|
" wartosci_przewidywan_wersu_drugiego = []\n",
|
|
" for indeks_wersu_drugiego in range(len(y_test)):\n",
|
|
" reprezentacja_wersu_drugiego = y_test[indeks_wersu_drugiego]\n",
|
|
" wejscie_do_MLP = torch.cat((reprezentacja_wersu_pierwszego, reprezentacja_wersu_drugiego))\n",
|
|
" mozliwe_indeksy_wersu_drugiego.append(indeks_wersu_drugiego)\n",
|
|
" wartosci_przewidywan_wersu_drugiego.append(regresor.predict([wejscie_do_MLP])[0])\n",
|
|
"\n",
|
|
" pom_df = pandas.DataFrame({\"indeks\":mozliwe_indeksy_wersu_drugiego,\"wartosc\":wartosci_przewidywan_wersu_drugiego})\n",
|
|
" proponowane_wersy = [testowe[\"out\"][i] for i in pom_df[\"indeks\"] if pom_df[\"wartosc\"][i]>=0.9]\n",
|
|
"\n",
|
|
" print(\"proponowane drugie wersy:\", proponowane_wersy)\n",
|
|
" print(\"czy poprawny wers jest pośród proponowanych wersów?:\", poprawny_wers_drugi in proponowane_wersy)\n",
|
|
" print(\"liczba proponowanych wersów:\", len(proponowane_wersy))\n",
|
|
" print()\n",
|
|
"\n",
|
|
" print(\"wynik przyjętej metryki:\", jagosz_score(poprawny_wers_drugi, proponowane_wersy))\n",
|
|
" if (len(proponowane_wersy)<1 or len(proponowane_wersy)>5):\n",
|
|
" print(\"wyjaśnienie - największe wartości przewidywań\")\n",
|
|
" print(pom_df.nlargest(len(proponowane_wersy)+1, \"wartosc\"))\n",
|
|
" print()\n",
|
|
" print(\"-\"*50)\n",
|
|
" print()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "SK0POtR2vK4D"
|
|
},
|
|
"source": [
|
|
"### Przyjęta metryka dla 1/100 zbioru testowego."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 133,
|
|
"metadata": {
|
|
"id": "3s4TfbKsvK4D",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"outputId": "7176ce6c-cb2d-4dae-870f-79b7fa30ed4d"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"0.008174386920980926\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"krotki = []\n",
|
|
"czesc_zbioru_testowego, _ = train_test_split(x_test,test_size=0.95,random_state=42)\n",
|
|
"\n",
|
|
"for indeks_wersu_pierwszego in range(len(czesc_zbioru_testowego)):\n",
|
|
" wers_pierwszy = testowe[\"in\"][indeks_wersu_pierwszego]\n",
|
|
" poprawny_wers_drugi = testowe[\"out\"][indeks_wersu_pierwszego]\n",
|
|
"\n",
|
|
" reprezentacja_wersu_pierwszego = x_test[indeks_wersu_pierwszego]\n",
|
|
" mozliwe_indeksy_wersu_drugiego = []\n",
|
|
" wartosci_przewidywan_wersu_drugiego = []\n",
|
|
" for indeks_wersu_drugiego in range(len(y_test)):\n",
|
|
" reprezentacja_wersu_drugiego = y_test[indeks_wersu_drugiego]\n",
|
|
" wejscie_do_MLP = torch.cat((reprezentacja_wersu_pierwszego, reprezentacja_wersu_drugiego))\n",
|
|
" mozliwe_indeksy_wersu_drugiego.append(indeks_wersu_drugiego)\n",
|
|
" wartosci_przewidywan_wersu_drugiego.append(regresor.predict([wejscie_do_MLP])[0])\n",
|
|
"\n",
|
|
" pom_df = pandas.DataFrame({\"indeks\":mozliwe_indeksy_wersu_drugiego,\"wartosc\":wartosci_przewidywan_wersu_drugiego})\n",
|
|
" proponowane_wersy = [testowe[\"out\"][i] for i in pom_df[\"indeks\"] if pom_df[\"wartosc\"][i]>=0.9]\n",
|
|
"\n",
|
|
" krotki.append((poprawny_wers_drugi,proponowane_wersy))\n",
|
|
"\n",
|
|
"print(jagosz_score_dla_zbioru(krotki))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {
|
|
"id": "wRdNz_BFvK4D"
|
|
},
|
|
"source": [
|
|
"### Średnia metryk dla 1/100 zbioru testowego."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 134,
|
|
"metadata": {
|
|
"id": "xVbgLOvUvK4E",
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"outputId": "71a3adac-7143-44e1-f363-99719e8a4c8a"
|
|
},
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"0.004512548262548263\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"jagosz_scores=[]\n",
|
|
"for indeks_wersu_pierwszego in range(len(czesc_zbioru_testowego)):\n",
|
|
" wers_pierwszy = testowe[\"in\"][indeks_wersu_pierwszego]\n",
|
|
" poprawny_wers_drugi = testowe[\"out\"][indeks_wersu_pierwszego]\n",
|
|
"\n",
|
|
" reprezentacja_wersu_pierwszego = x_test[indeks_wersu_pierwszego]\n",
|
|
" mozliwe_indeksy_wersu_drugiego = []\n",
|
|
" wartosci_przewidywan_wersu_drugiego = []\n",
|
|
" for indeks_wersu_drugiego in range(len(y_test)):\n",
|
|
" reprezentacja_wersu_drugiego = y_test[indeks_wersu_drugiego]\n",
|
|
" wejscie_do_MLP = torch.cat((reprezentacja_wersu_pierwszego, reprezentacja_wersu_drugiego))\n",
|
|
" mozliwe_indeksy_wersu_drugiego.append(indeks_wersu_drugiego)\n",
|
|
" wartosci_przewidywan_wersu_drugiego.append(regresor.predict([wejscie_do_MLP])[0])\n",
|
|
"\n",
|
|
" pom_df = pandas.DataFrame({\"indeks\":mozliwe_indeksy_wersu_drugiego,\"wartosc\":wartosci_przewidywan_wersu_drugiego})\n",
|
|
" proponowane_wersy = [testowe[\"out\"][i] for i in pom_df[\"indeks\"] if pom_df[\"wartosc\"][i]>=0.9]\n",
|
|
"\n",
|
|
" jagosz_scores.append(jagosz_score(poprawny_wers_drugi,proponowane_wersy))\n",
|
|
"\n",
|
|
"print(numpy.mean(jagosz_scores))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"source": [
|
|
"print(len(czesc_zbioru_testowego))"
|
|
],
|
|
"metadata": {
|
|
"colab": {
|
|
"base_uri": "https://localhost:8080/"
|
|
},
|
|
"id": "MynyTHiUFPEU",
|
|
"outputId": "d3fd2288-464f-4ad7-d5f3-baac564cdbda"
|
|
},
|
|
"execution_count": 135,
|
|
"outputs": [
|
|
{
|
|
"output_type": "stream",
|
|
"name": "stdout",
|
|
"text": [
|
|
"74\n"
|
|
]
|
|
}
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.3"
|
|
},
|
|
"colab": {
|
|
"provenance": [],
|
|
"gpuType": "T4"
|
|
},
|
|
"accelerator": "GPU"
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0
|
|
} |