2021-03-18 12:04:55 +01:00
{
2021-03-19 00:36:47 +01:00
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "HxtCFj1hfXw6"
},
"source": [
"# 0. Instalacja i importowanie modułów"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "enDE5aTIgN-v"
},
"source": [
"##### 0.1. Ogólne"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 1,
2021-03-19 00:36:47 +01:00
"metadata": {
"id": "D7_8XDfpfH-X"
},
2021-03-20 13:39:15 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: tflearn==0.5 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from -r requirements.txt (line 1)) (0.5.0)\n",
"Requirement already satisfied: tensorflow in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from -r requirements.txt (line 2)) (2.4.1)\n",
"Requirement already satisfied: pystempel==1.2 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from -r requirements.txt (line 3)) (1.2.0)\n",
"Requirement already satisfied: six in c:\\users\\annad\\anaconda3\\lib\\site-packages (from tflearn==0.5->-r requirements.txt (line 1)) (1.15.0)\n",
"Requirement already satisfied: Pillow in c:\\users\\annad\\anaconda3\\lib\\site-packages (from tflearn==0.5->-r requirements.txt (line 1)) (8.0.1)\n",
"Requirement already satisfied: numpy in c:\\users\\annad\\anaconda3\\lib\\site-packages (from tflearn==0.5->-r requirements.txt (line 1)) (1.19.2)\n",
"Requirement already satisfied: flatbuffers~=1.12.0 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from tensorflow->-r requirements.txt (line 2)) (1.12)\n",
"Requirement already satisfied: gast==0.3.3 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from tensorflow->-r requirements.txt (line 2)) (0.3.3)\n",
"Requirement already satisfied: tensorflow-estimator<2.5.0,>=2.4.0 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from tensorflow->-r requirements.txt (line 2)) (2.4.0)\n",
"Requirement already satisfied: grpcio~=1.32.0 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from tensorflow->-r requirements.txt (line 2)) (1.32.0)\n",
"Requirement already satisfied: wheel~=0.35 in c:\\users\\annad\\anaconda3\\lib\\site-packages (from tensorflow->-r requirements.txt (line 2)) (0.35.1)\n",
"Requirement already satisfied: wrapt~=1.12.1 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from tensorflow->-r requirements.txt (line 2)) (1.12.1)\n",
"Requirement already satisfied: termcolor~=1.1.0 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from tensorflow->-r requirements.txt (line 2)) (1.1.0)\n",
"Requirement already satisfied: google-pasta~=0.2 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from tensorflow->-r requirements.txt (line 2)) (0.2.0)\n",
"Requirement already satisfied: absl-py~=0.10 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from tensorflow->-r requirements.txt (line 2)) (0.12.0)\n",
"Requirement already satisfied: tensorboard~=2.4 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from tensorflow->-r requirements.txt (line 2)) (2.4.1)\n",
"Requirement already satisfied: astunparse~=1.6.3 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from tensorflow->-r requirements.txt (line 2)) (1.6.3)\n",
"Requirement already satisfied: opt-einsum~=3.3.0 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from tensorflow->-r requirements.txt (line 2)) (3.3.0)\n",
"Requirement already satisfied: typing-extensions~=3.7.4 in c:\\users\\annad\\anaconda3\\lib\\site-packages (from tensorflow->-r requirements.txt (line 2)) (3.7.4.3)\n",
"Requirement already satisfied: h5py~=2.10.0 in c:\\users\\annad\\anaconda3\\lib\\site-packages (from tensorflow->-r requirements.txt (line 2)) (2.10.0)\n",
"Requirement already satisfied: keras-preprocessing~=1.1.2 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from tensorflow->-r requirements.txt (line 2)) (1.1.2)\n",
"Requirement already satisfied: protobuf>=3.9.2 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from tensorflow->-r requirements.txt (line 2)) (3.15.6)\n",
"Requirement already satisfied: sortedcontainers in c:\\users\\annad\\anaconda3\\lib\\site-packages (from pystempel==1.2->-r requirements.txt (line 3)) (2.2.2)\n",
"Requirement already satisfied: tqdm in c:\\users\\annad\\anaconda3\\lib\\site-packages (from pystempel==1.2->-r requirements.txt (line 3)) (4.50.2)\n",
"Requirement already satisfied: markdown>=2.6.8 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from tensorboard~=2.4->tensorflow->-r requirements.txt (line 2)) (3.3.4)\n",
"Requirement already satisfied: requests<3,>=2.21.0 in c:\\users\\annad\\anaconda3\\lib\\site-packages (from tensorboard~=2.4->tensorflow->-r requirements.txt (line 2)) (2.24.0)\n",
"Requirement already satisfied: google-auth<2,>=1.6.3 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from tensorboard~=2.4->tensorflow->-r requirements.txt (line 2)) (1.28.0)\n",
"Requirement already satisfied: setuptools>=41.0.0 in c:\\users\\annad\\anaconda3\\lib\\site-packages (from tensorboard~=2.4->tensorflow->-r requirements.txt (line 2)) (50.3.1.post20201107)\n",
"Requirement already satisfied: werkzeug>=0.11.15 in c:\\users\\annad\\anaconda3\\lib\\site-packages (from tensorboard~=2.4->tensorflow->-r requirements.txt (line 2)) (1.0.1)\n",
"Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from tensorboard~=2.4->tensorflow->-r requirements.txt (line 2)) (1.8.0)\n",
"Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from tensorboard~=2.4->tensorflow->-r requirements.txt (line 2)) (0.4.3)\n",
"Requirement already satisfied: idna<3,>=2.5 in c:\\users\\annad\\anaconda3\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard~=2.4->tensorflow->-r requirements.txt (line 2)) (2.10)\n",
"Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\annad\\anaconda3\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard~=2.4->tensorflow->-r requirements.txt (line 2)) (2020.6.20)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in c:\\users\\annad\\anaconda3\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard~=2.4->tensorflow->-r requirements.txt (line 2)) (3.0.4)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in c:\\users\\annad\\anaconda3\\lib\\site-packages (from requests<3,>=2.21.0->tensorboard~=2.4->tensorflow->-r requirements.txt (line 2)) (1.25.11)\n",
"Requirement already satisfied: rsa<5,>=3.1.4; python_version >= \"3.6\" in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from google-auth<2,>=1.6.3->tensorboard~=2.4->tensorflow->-r requirements.txt (line 2)) (4.7.2)\n",
"Requirement already satisfied: pyasn1-modules>=0.2.1 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from google-auth<2,>=1.6.3->tensorboard~=2.4->tensorflow->-r requirements.txt (line 2)) (0.2.8)\n",
"Requirement already satisfied: cachetools<5.0,>=2.0.0 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from google-auth<2,>=1.6.3->tensorboard~=2.4->tensorflow->-r requirements.txt (line 2)) (4.2.1)\n",
"Requirement already satisfied: requests-oauthlib>=0.7.0 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard~=2.4->tensorflow->-r requirements.txt (line 2)) (1.3.0)\n",
"Requirement already satisfied: pyasn1>=0.1.3 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from rsa<5,>=3.1.4; python_version >= \"3.6\"->google-auth<2,>=1.6.3->tensorboard~=2.4->tensorflow->-r requirements.txt (line 2)) (0.4.8)\n",
"Requirement already satisfied: oauthlib>=3.0.0 in c:\\users\\annad\\appdata\\roaming\\python\\python38\\site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard~=2.4->tensorflow->-r requirements.txt (line 2)) (3.1.0)\n",
"Package Version\n",
"---------------------------------- -------------------\n",
"absl-py 0.12.0\n",
"alabaster 0.7.12\n",
"anaconda-client 1.7.2\n",
"anaconda-navigator 1.10.0\n",
"anaconda-project 0.8.3\n",
"argh 0.26.2\n",
"argon2-cffi 20.1.0\n",
"asn1crypto 1.4.0\n",
"astroid 2.4.2\n",
"astropy 4.0.2\n",
"astunparse 1.6.3\n",
"async-generator 1.10\n",
"atomicwrites 1.4.0\n",
"attrs 20.3.0\n",
"autopep8 1.5.4\n",
"Babel 2.8.1\n",
"backcall 0.2.0\n",
"backports.functools-lru-cache 1.6.1\n",
"backports.shutil-get-terminal-size 1.0.0\n",
"backports.tempfile 1.0\n",
"backports.weakref 1.0.post1\n",
"bcrypt 3.2.0\n",
"beautifulsoup4 4.9.3\n",
"bitarray 1.6.1\n",
"bkcharts 0.2\n",
"bleach 3.2.1\n",
"bokeh 2.2.3\n",
"boto 2.49.0\n",
"Bottleneck 1.3.2\n",
"brotlipy 0.7.0\n",
"cachetools 4.2.1\n",
"certifi 2020.6.20\n",
"cffi 1.14.3\n",
"chardet 3.0.4\n",
"click 7.1.2\n",
"cloudpickle 1.6.0\n",
"clyent 1.2.2\n",
"colorama 0.4.4\n",
"comtypes 1.1.7\n",
"conda 4.9.2\n",
"conda-build 3.20.5\n",
"conda-package-handling 1.7.2\n",
"conda-verify 3.4.2\n",
"contextlib2 0.6.0.post1\n",
"cryptography 3.1.1\n",
"cycler 0.10.0\n",
"Cython 0.29.21\n",
"cytoolz 0.11.0\n",
"dask 2.30.0\n",
"decorator 4.4.2\n",
"defusedxml 0.6.0\n",
"diff-match-patch 20200713\n",
"distributed 2.30.1\n",
"docutils 0.16\n",
"entrypoints 0.3\n",
"et-xmlfile 1.0.1\n",
"fastcache 1.1.0\n",
"filelock 3.0.12\n",
"flake8 3.8.4\n",
"Flask 1.1.2\n",
"flatbuffers 1.12\n",
"fsspec 0.8.3\n",
"future 0.18.2\n",
"gast 0.3.3\n",
"gevent 20.9.0\n",
"glob2 0.7\n",
"google-auth 1.28.0\n",
"google-auth-oauthlib 0.4.3\n",
"google-pasta 0.2.0\n",
"greenlet 0.4.17\n",
"grpcio 1.32.0\n",
"h5py 2.10.0\n",
"HeapDict 1.0.1\n",
"html5lib 1.1\n",
"idna 2.10\n",
"imageio 2.9.0\n",
"imagesize 1.2.0\n",
"importlib-metadata 2.0.0\n",
"iniconfig 1.1.1\n",
"intervaltree 3.1.0\n",
"ipykernel 5.3.4\n",
"ipython 7.19.0\n",
"ipython-genutils 0.2.0\n",
"ipywidgets 7.5.1\n",
"isort 5.6.4\n",
"itsdangerous 1.1.0\n",
"jdcal 1.4.1\n",
"jedi 0.17.1\n",
"Jinja2 2.11.2\n",
"joblib 0.17.0\n",
"json5 0.9.5\n",
"jsonschema 3.2.0\n",
"jupyter 1.0.0\n",
"jupyter-client 6.1.7\n",
"jupyter-console 6.2.0\n",
"jupyter-core 4.6.3\n",
"jupyterlab 2.2.6\n",
"jupyterlab-pygments 0.1.2\n",
"jupyterlab-server 1.2.0\n",
"Keras-Preprocessing 1.1.2\n",
"keyring 21.4.0\n",
"kiwisolver 1.3.0\n",
"lazy-object-proxy 1.4.3\n",
"libarchive-c 2.9\n",
"llvmlite 0.34.0\n",
"locket 0.2.0\n",
"lxml 4.6.1\n",
"Markdown 3.3.4\n",
"MarkupSafe 1.1.1\n",
"matplotlib 3.3.2\n",
"mccabe 0.6.1\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"menuinst 1.4.16\n",
"mistune 0.8.4\n",
"mkl-fft 1.2.0\n",
"mkl-random 1.1.1\n",
"mkl-service 2.3.0\n",
"mock 4.0.2\n",
"more-itertools 8.6.0\n",
"mpmath 1.1.0\n",
"msgpack 1.0.0\n",
"multipledispatch 0.6.0\n",
"navigator-updater 0.2.1\n",
"nbclient 0.5.1\n",
"nbconvert 6.0.7\n",
"nbformat 5.0.8\n",
"nest-asyncio 1.4.2\n",
"networkx 2.5\n",
"nltk 3.5\n",
"nose 1.3.7\n",
"notebook 6.1.4\n",
"numba 0.51.2\n",
"numexpr 2.7.1\n",
"numpy 1.19.2\n",
"numpydoc 1.1.0\n",
"oauthlib 3.1.0\n",
"olefile 0.46\n",
"openpyxl 3.0.5\n",
"opt-einsum 3.3.0\n",
"packaging 20.4\n",
"pandas 1.1.3\n",
"pandocfilters 1.4.3\n",
"paramiko 2.7.2\n",
"parso 0.7.0\n",
"partd 1.1.0\n",
"path 15.0.0\n",
"pathlib2 2.3.5\n",
"pathtools 0.1.2\n",
"patsy 0.5.1\n",
"pep8 1.7.1\n",
"pexpect 4.8.0\n",
"pickleshare 0.7.5\n",
"Pillow 8.0.1\n",
"pip 20.2.4\n",
"pkginfo 1.6.1\n",
"pluggy 0.13.1\n",
"ply 3.11\n",
"prometheus-client 0.8.0\n",
"prompt-toolkit 3.0.8\n",
"protobuf 3.15.6\n",
"psutil 5.7.2\n",
"py 1.9.0\n",
"pyasn1 0.4.8\n",
"pyasn1-modules 0.2.8\n",
"pycodestyle 2.6.0\n",
"pycosat 0.6.3\n",
"pycparser 2.20\n",
"pycurl 7.43.0.6\n",
"pydocstyle 5.1.1\n",
"pyflakes 2.2.0\n",
"Pygments 2.7.2\n",
"pylint 2.6.0\n",
"PyNaCl 1.4.0\n",
"pyodbc 4.0.0-unsupported\n",
"pyOpenSSL 19.1.0\n",
"pyparsing 2.4.7\n",
"pyreadline 2.1\n",
"pyrsistent 0.17.3\n",
"PySocks 1.7.1\n",
"pystempel 1.2.0\n",
"pytest 0.0.0\n",
"python-dateutil 2.8.1\n",
"python-jsonrpc-server 0.4.0\n",
"python-language-server 0.35.1\n",
"pytz 2020.1\n",
"PyWavelets 1.1.1\n",
"pywin32 227\n",
"pywin32-ctypes 0.2.0\n",
"pywinpty 0.5.7\n",
"PyYAML 5.3.1\n",
"pyzmq 19.0.2\n",
"QDarkStyle 2.8.1\n",
"QtAwesome 1.0.1\n",
"qtconsole 4.7.7\n",
"QtPy 1.9.0\n",
"regex 2020.10.15\n",
"requests 2.24.0\n",
"requests-oauthlib 1.3.0\n",
"rope 0.18.0\n",
"rsa 4.7.2\n",
"Rtree 0.9.4\n",
"ruamel-yaml 0.15.87\n",
"scikit-image 0.17.2\n",
"scikit-learn 0.23.2\n",
"scipy 1.5.2\n",
"seaborn 0.11.0\n",
"Send2Trash 1.5.0\n",
"setuptools 50.3.1.post20201107\n",
"simplegeneric 0.8.1\n",
"singledispatch 3.4.0.3\n",
"sip 4.19.13\n",
"six 1.15.0\n",
"snowballstemmer 2.0.0\n",
"sortedcollections 1.2.1\n",
"sortedcontainers 2.2.2\n",
"soupsieve 2.0.1\n",
"Sphinx 3.2.1\n",
"sphinxcontrib-applehelp 1.0.2\n",
"sphinxcontrib-devhelp 1.0.2\n",
"sphinxcontrib-htmlhelp 1.0.3\n",
"sphinxcontrib-jsmath 1.0.1\n",
"sphinxcontrib-qthelp 1.0.3\n",
"sphinxcontrib-serializinghtml 1.1.4\n",
"sphinxcontrib-websupport 1.2.4\n",
"spyder 4.1.5\n",
"spyder-kernels 1.9.4\n",
"SQLAlchemy 1.3.20\n",
"statsmodels 0.12.0\n",
"sympy 1.6.2\n",
"tables 3.6.1\n",
"tblib 1.7.0\n",
"tensorboard 2.4.1\n",
"tensorboard-plugin-wit 1.8.0\n",
"tensorflow 2.4.1\n",
"tensorflow-estimator 2.4.0\n",
"termcolor 1.1.0\n",
"terminado 0.9.1\n",
"testpath 0.4.4\n",
"tflearn 0.5.0\n",
"threadpoolctl 2.1.0\n",
"tifffile 2020.10.1\n",
"toml 0.10.1\n",
"toolz 0.11.1\n",
"tornado 6.0.4\n",
"tqdm 4.50.2\n",
"traitlets 5.0.5\n",
"typing-extensions 3.7.4.3\n",
"ujson 4.0.1\n",
"unicodecsv 0.14.1\n",
"urllib3 1.25.11\n",
"watchdog 0.10.3\n",
"wcwidth 0.2.5\n",
"webencodings 0.5.1\n",
"Werkzeug 1.0.1\n",
"wheel 0.35.1\n",
"widgetsnbextension 3.5.1\n",
"win-inet-pton 1.1.0\n",
"win-unicode-console 0.5\n",
"wincertstore 0.2\n",
"wrapt 1.12.1\n",
"xlrd 1.2.0\n",
"XlsxWriter 1.3.7\n",
"xlwings 0.20.8\n",
"xlwt 1.3.0\n",
"xmltodict 0.12.0\n",
"yapf 0.30.0\n",
"zict 2.0.0\n",
"zipp 3.4.0\n",
"zope.event 4.5.0\n",
"zope.interface 5.1.2\n"
]
}
],
2021-03-19 00:36:47 +01:00
"source": [
"!pip install -r requirements.txt --user\n",
"!pip list"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 2,
2021-03-19 00:36:47 +01:00
"metadata": {
"id": "GOGs4hL6fwwK"
},
2021-03-20 13:39:15 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From C:\\Users\\annad\\AppData\\Roaming\\Python\\Python38\\site-packages\\tensorflow\\python\\compat\\v2_compat.py:96: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"non-resource variables are not supported in the long term\n",
"curses is not supported on this machine (please install/reinstall curses for an optimal experience)\n"
]
}
],
2021-03-19 00:36:47 +01:00
"source": [
"import numpy as np\n",
"import tflearn\n",
"import tensorflow\n",
"import random\n",
2021-03-20 13:39:15 +01:00
"import json\n",
"import nltk"
2021-03-19 00:36:47 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Mr0ZD1L2gCWw"
},
"source": [
"##### 0.2. Angielski Stemmer: https://www.nltk.org/_modules/nltk/stem/lancaster.html"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 4,
2021-03-19 00:36:47 +01:00
"metadata": {
"id": "jy4-9guXgBY3"
},
2021-03-20 13:39:15 +01:00
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[nltk_data] Downloading package punkt to\n",
"[nltk_data] C:\\Users\\annad\\AppData\\Roaming\\nltk_data...\n",
"[nltk_data] Package punkt is already up-to-date!\n"
]
}
],
2021-03-19 00:36:47 +01:00
"source": [
"nltk.download('punkt')\n",
"from nltk.stem.lancaster import LancasterStemmer\n",
"stemmer_en = LancasterStemmer()"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "uPpcNQa_ggUl"
},
"source": [
"##### 0.3. Polski Stemmer **(Docelowy)**: https://pypi.org/project/pystempel/"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 5,
2021-03-19 00:36:47 +01:00
"metadata": {
"id": "XBpvJXn1gBDi"
},
2021-03-20 13:39:15 +01:00
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loading: 92%|██████████████████████████████████████████████████▋ | 2051427/2225192 [00:01<00:00, 1634466.61bytes/s]"
]
}
],
2021-03-19 00:36:47 +01:00
"source": [
"from stempel import StempelStemmer\n",
"\n",
2021-03-20 13:39:15 +01:00
"stemmer_pl = StempelStemmer.default() #.polimorf() #jest lepsza?"
2021-03-19 00:36:47 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Lg_3MO_3hQV_"
},
"source": [
"# 1. Załadowanie plików **.json** z bazą słów"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "BzBo1657hn3w"
},
"source": [
"##### 1.1. Docelowa baza słów polskich do nauki modelu (10 rodzajów odp - PL)"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 6,
2021-03-19 00:36:47 +01:00
"metadata": {
2021-03-18 12:04:55 +01:00
"colab": {
2021-03-19 00:36:47 +01:00
"base_uri": "https://localhost:8080/"
},
"id": "jKsIW7hHhepB",
"outputId": "09ba1cb1-bb0e-44ee-9d28-017209902934"
},
2021-03-20 13:39:15 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'intents': [{'tag': 'greeting', 'patterns': ['Cześć', 'Elo', 'Jesteś?', 'Hej', 'Dzień dobry', 'Sup', 'Witam', 'Hejka', 'Hej!'], 'responses': ['Cześć!', 'Dobrze Cię widzieć!', 'Hej, w czym mogę pomóc?'], 'context_set': ''}, {'tag': 'goodbye', 'patterns': ['narazie', 'Do zobaczenia', 'Dowidzenia', 'Dobranoc', 'Miłego dnia'], 'responses': ['Do zobaczenia później', 'Mam nadzieję, że później pogadamy', 'Narazie!'], 'context_set': ''}, {'tag': 'age', 'patterns': ['Ile masz lat', 'Ile lat ma Janet', 'Wiek', 'Jak stara jesteś', 'jaki jest twój wiek', 'Wieko', 'urodziny'], 'responses': ['Mam kilka dni', 'Urodziłam się 17.03.2021'], 'context_set': ''}, {'tag': 'name', 'patterns': ['Jak masz na imię', 'Jak Cię zwą?', 'twoje imie?', 'Imie', 'Jak cię nazywać', 'Kim jesteś'], 'responses': ['Możesz mnie nazywać Janet!', 'Jestem Janet', 'Jestem Janet, twój ulubiony chatbot'], 'context_set': ''}, {'tag': 'goout', 'patterns': ['Czy chcesz gdzieś wyjść?', 'zrobimy coś razem?', 'pójdziemy gdzieś razem?'], 'responses': ['Może kiedy indziej', 'Odezwę się latem'], 'context_set': ''}, {'tag': 'doing', 'patterns': ['Co robisz teraz?', 'co słychać', 'jakie masz plany?', 'jak się masz?'], 'responses': ['Gram w grę', 'Słucham muzyki', 'nie twój interes', 'nie mam czasu odpowiadać', ''], 'context_set': ''}, {'tag': 'game', 'patterns': ['a w co grasz?', 'a w co?', 'grasz?', 'jaka gra'], 'responses': ['nie interesuj się', 'a co cię to obchodzi', '...', 'w coś', 'W OSRS'], 'context_set': ''}, {'tag': 'music', 'patterns': ['czego?', 'czego słuchasz?', 'jakiej muzyki?', 'a czego?'], 'responses': ['Starego vinyla z 1995 roku', 'mojego ulubionego setu rejwowego', 'czegoś tam...'], 'context_set': ''}, {'tag': 'angry', 'patterns': ['bo co?', 'dlaczego?', 'jak to?'], 'responses': ['Czas zakończyć rozmowę', 'nie mam na to siły i czasu', 'zostaw mnie w spokoju'], 'context_set': ''}, {'tag': 'why', 'patterns': ['o co chodzi?', 'czemu jesteś zła', 'poczekaj'], 'responses': ['...', 'Nie pisz do mnie więcej', 'Nie lubię Cię'], 'context_set': ''}]}\n"
]
}
],
2021-03-19 00:36:47 +01:00
"source": [
"with open(\"intents_pl.json\", encoding='utf-8') as file:\n",
" data_pl = json.load(file)\n",
"\n",
"print(data_pl)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "g94eHpqshoat"
},
"source": [
"##### 1.2. Skrócona baza słów (4 rodzaje odp - PL)"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 7,
2021-03-19 00:36:47 +01:00
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gJbm_CtRhNOK",
"outputId": "157196fc-6a25-4a70-aca3-9d886c743f6c"
},
2021-03-20 13:39:15 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'intents': [{'tag': 'greeting', 'patterns': ['Cześć', 'Elo', 'Jesteś?', 'Hej', 'Dzień dobry', 'Sup'], 'responses': ['Cześć!', 'Dobrze Cię widzieć!', 'Hej, w czym mogę pomóc?'], 'context_set': ''}, {'tag': 'goodbye', 'patterns': ['narazie', 'Do zobaczenia', 'Dowidzenia', 'Dobranoc', 'Miłego dnia'], 'responses': ['Do zobaczenia później', 'Mam nadzieję, że później pogadamy', 'Narazie!'], 'context_set': ''}, {'tag': 'age', 'patterns': ['Ile masz lat', 'Ile lat ma Janet', 'Wiek', 'Jak stara jesteś', 'urodziny'], 'responses': ['Mam kilka dni', 'Urodziłam się 17.03.2021'], 'context_set': ''}, {'tag': 'name', 'patterns': ['Jak masz na imię', 'Jak Cię zwą?', 'twoje imie?', 'Imie', 'Jak cię nazywać', 'Kim jesteś'], 'responses': ['Możesz mnie nazywać Janet!', 'Jestem Janet', 'Jestem Janet, twój ulubiony chatbot'], 'context_set': ''}]}\n"
]
}
],
2021-03-19 00:36:47 +01:00
"source": [
"with open(\"intents_pl_short.json\", encoding='utf-8') as file:\n",
" data_pl_short = json.load(file)\n",
"\n",
"print(data_pl_short)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "HjkIUMy2ho6C"
},
"source": [
"##### 1.3. Testowa baza słów angielskich (6 rodzajów odp - EN)"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 8,
2021-03-19 00:36:47 +01:00
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "vW5FyoRqhfIc",
"outputId": "378d8894-9c9c-46be-ade1-b6491f095179"
},
2021-03-20 13:39:15 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'intents': [{'tag': 'greeting', 'patterns': ['Hi', 'How are you', 'Is anyone there?', 'Hello', 'Good day', 'Whats up'], 'responses': ['Hello!', 'Good to see you again!', 'Hi there, how can I help?'], 'context_set': ''}, {'tag': 'goodbye', 'patterns': ['cya', 'See you later', 'Goodbye', 'I am Leaving', 'Have a Good day'], 'responses': ['Sad to see you go :(', 'Talk to you later', 'Goodbye!'], 'context_set': ''}, {'tag': 'age', 'patterns': ['how old', 'how old is tim', 'what is your age', 'how old are you', 'age?'], 'responses': ['I am 18 years old!', '18 years young!'], 'context_set': ''}, {'tag': 'name', 'patterns': ['what is your name', 'what should I call you', 'whats your name?'], 'responses': ['You can call me Tim.', \"I'm Tim!\", \"I'm Tim aka Tech With Tim.\"], 'context_set': ''}, {'tag': 'shop', 'patterns': ['Id like to buy something', 'whats on the menu', 'what do you reccommend?', 'could i get something to eat'], 'responses': ['We sell chocolate chip cookies for $2!', 'Cookies are on the menu!'], 'context_set': ''}, {'tag': 'hours', 'patterns': ['when are you guys open', 'what are your hours', 'hours of operation'], 'responses': ['We are open 7am-4pm Monday-Friday!'], 'context_set': ''}]}\n"
]
}
],
2021-03-19 00:36:47 +01:00
"source": [
"with open(\"intents_en.json\", encoding='utf-8') as file:\n",
" data_en = json.load(file)\n",
"\n",
"print(data_en)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "4BnsOkqqjBlr"
},
"source": [
"# 2. Przygotowanie danych do nauki modelu"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 9,
2021-03-19 00:36:47 +01:00
"metadata": {
"id": "gy6p55-DjLyY"
},
"outputs": [],
"source": [
"words = []\n",
"labels = []\n",
"docs_x = []\n",
"docs_y = []"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "XxZX-JQA5zjL"
},
"source": [
"##### 2.1 Stworzenie tablicy ze wszystkimi możliwymi inputami użytkownika (+ labele)"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 10,
2021-03-19 00:36:47 +01:00
"metadata": {
"id": "nBUKwSr_kVSd"
},
"outputs": [],
"source": [
2021-03-20 13:39:15 +01:00
"for intent in data_pl[\"intents\"]: #Loop przez cały json\n",
2021-03-19 00:36:47 +01:00
" for pattern in intent[\"patterns\"]: #loop przez wszystkie możliwe rodzaje przykładowego inputu użytkownika\n",
" wrds = nltk.word_tokenize(pattern) #Tokenizing every word\n",
" words.extend(wrds) #Add every single tokenized word\n",
" docs_x.append(wrds) #Add the whole tokenized sentence\n",
" docs_y.append(intent[\"tag\"]) #Pattern x coresponds to the tag y. Potrzebne do ustalenia relacji słowa z odpowiedzią\n",
"\n",
" if intent[\"tag\"] not in labels:\n",
" labels.append(intent[\"tag\"]) #Add the tag"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 11,
2021-03-19 00:36:47 +01:00
"metadata": {
"id": "wOyP5lbikV1e"
},
"outputs": [],
"source": [
"words = [stemmer_pl.stem(w.lower()) for w in words if w not in \"?\"] #stemming -> take each word and bring it to the \"root\" form. Only the stemmed version of the word is important to us\n",
"words = sorted(list(set(words))) #Sorting\n",
"\n",
"labels = sorted(labels) #sorting\n",
"\n",
"training = []\n",
"output = []\n",
"\n",
"out_empty = [0 for _ in range(len(labels))]"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 12,
2021-03-19 00:36:47 +01:00
"metadata": {},
2021-03-20 13:39:15 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Words:\n",
"['!', 'a', 'bo', 'chcieć', 'chodzić', 'co', 'coś', 'czec', 'czy', 'dlaczy', 'dnia', 'do', 'dobranoc', 'dobry', 'dowidzieć', 'dzień', 'elo', 'gra', 'grać', 'gć', 'hej', 'hejka', 'ile', 'imia', 'imie', 'jak', 'jaka', 'jaki', 'janet', 'jest', 'ki', 'lat', 'mieć', 'miłeon', 'muzy', 'na', 'narazie', 'nazywać', 'o', 'plany', 'poczekać', 'pójdziemy', 'raz', 'robić', 'się', 'star', 'supć', 'słuchać', 'słychać', 'teraa', 'to', 'twoj', 'twój', 'ty', 'urodziny', 'w', 'wiek', 'wieko', 'witać', 'wyjść', 'y', 'zobaczyć', 'zrobić', 'zwać', 'zła']\n",
"labels:\n",
"['age', 'angry', 'doing', 'game', 'goodbye', 'goout', 'greeting', 'music', 'name', 'why']\n",
"docs_y:\n",
"['greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'age', 'age', 'age', 'age', 'age', 'age', 'age', 'name', 'name', 'name', 'name', 'name', 'name', 'goout', 'goout', 'goout', 'doing', 'doing', 'doing', 'doing', 'game', 'game', 'game', 'game', 'music', 'music', 'music', 'music', 'angry', 'angry', 'angry', 'why', 'why', 'why']\n",
"docs_x:\n",
"[['Cześć'], ['Elo'], ['Jesteś', '?'], ['Hej'], ['Dzień', 'dobry'], ['Sup'], ['Witam'], ['Hejka'], ['Hej', '!'], ['narazie'], ['Do', 'zobaczenia'], ['Dowidzenia'], ['Dobranoc'], ['Miłego', 'dnia'], ['Ile', 'masz', 'lat'], ['Ile', 'lat', 'ma', 'Janet'], ['Wiek'], ['Jak', 'stara', 'jesteś'], ['jaki', 'jest', 'twój', 'wiek'], ['Wieko'], ['urodziny'], ['Jak', 'masz', 'na', 'imię'], ['Jak', 'Cię', 'zwą', '?'], ['twoje', 'imie', '?'], ['Imie'], ['Jak', 'cię', 'nazywać'], ['Kim', 'jesteś'], ['Czy', 'chcesz', 'gdzieś', 'wyjść', '?'], ['zrobimy', 'coś', 'razem', '?'], ['pójdziemy', 'gdzieś', 'razem', '?'], ['Co', 'robisz', 'teraz', '?'], ['co', 'słychać'], ['jakie', 'masz', 'plany', '?'], ['jak', 'się', 'masz', '?'], ['a', 'w', 'co', 'grasz', '?'], ['a', 'w', 'co', '?'], ['grasz', '?'], ['jaka', 'gra'], ['czego', '?'], ['czego', 'słuchasz', '?'], ['jakiej', 'muzyki', '?'], ['a', 'czego', '?'], ['bo', 'co', '?'], ['dlaczego', '?'], ['jak', 'to', '?'], ['o', 'co', 'chodzi', '?'], ['czemu', 'jesteś', 'zła'], ['poczekaj']]\n"
]
}
],
2021-03-19 00:36:47 +01:00
"source": [
"#Podgląd zmiennych\n",
"print(f\"Words:\\n{words}\")\n",
"print(f\"labels:\\n{labels}\")\n",
"print(f\"docs_y:\\n{docs_y}\")\n",
"print(f\"docs_x:\\n{docs_x}\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "WewUeunf5_Za"
},
"source": [
"##### 3.2. Przypisywanie słów do danej kategorii (ie. \"Cześć\" do Greetings)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "1Q43_qtZ6KNP"
},
"source": [
"W przypadku data_pl_short są tylko 4 rodzaje odpowiedzi. \"Cześć\" które zostane przypisane do labela \"greeting\" będzie miało formę końcowego outputu \"1000\" jeżeli label \"greetings\" jest pierwszy do wyboru."
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "doFER5OS7CC_"
},
"source": [
"Warto też dodać, że sieć neuronowa nie przyjmuje teksu. To jest główny powód czemu przypisujemy słowa do kategorii"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 13,
2021-03-19 00:36:47 +01:00
"metadata": {
"id": "8FDKrjpjkYsE"
},
"outputs": [],
"source": [
"for x, doc in enumerate(docs_x): #Przejście przez wszystkie słowa\n",
" bag =[]\n",
"\n",
2021-03-20 13:39:15 +01:00
" wrds = [stemmer_pl.stem(w).lower() for w in doc] #podział wszystkich słów w danym zdaniu\n",
2021-03-19 00:36:47 +01:00
"\n",
" for w in words:\n",
" if w in wrds:\n",
" bag.append(1) #this word exist\n",
" else:\n",
" bag.append(0) #do not exist\n",
" \n",
" output_row = out_empty[:] #kopia\n",
" output_row[labels.index(docs_y[x])] = 1\n",
"\n",
" training.append(bag) #dodajemy nowe wyrażenie zamienione na ciąg binarny\n",
" output.append(output_row)\n",
"\n",
"training = np.array(training) #Zbiór treningowy\n",
"output = np.array(output) #Zbiór outputów"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 20,
2021-03-19 00:36:47 +01:00
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "cJKUjbkC72-f",
"outputId": "7e2bff96-78ce-49ff-b27b-eee77752228d"
},
2021-03-20 13:39:15 +01:00
"outputs": [
{
"data": {
"text/plain": [
"48"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Loading: 100%|███████████████████████████████████████████████████████| 2225192/2225192 [00:20<00:00, 1720623.81bytes/s]\u001b[A"
]
}
],
2021-03-19 00:36:47 +01:00
"source": [
"len(training) #dla pl_short mamy 44 słowa"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 11,
2021-03-19 00:36:47 +01:00
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Kx43VDgS7-yN",
"outputId": "4fa6f6fe-dc58-4e76-bb26-38c1784ab79c"
},
2021-03-20 13:39:15 +01:00
"outputs": [
{
"data": {
"text/plain": [
"10"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-19 00:36:47 +01:00
"source": [
"len(output[0]) #Które można przypisać do 4 kategorii"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 14,
2021-03-19 00:36:47 +01:00
"metadata": {},
2021-03-20 13:39:15 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[0 0 0 ... 0 0 0]\n",
" [0 0 0 ... 0 0 0]\n",
" [0 0 0 ... 0 0 0]\n",
" ...\n",
" [0 0 0 ... 0 0 0]\n",
" [0 0 0 ... 0 0 1]\n",
" [0 0 0 ... 0 0 0]]\n",
"[[0 0 0 0 0 0 1 0 0 0]\n",
" [0 0 0 0 0 0 1 0 0 0]\n",
" [0 0 0 0 0 0 1 0 0 0]\n",
" [0 0 0 0 0 0 1 0 0 0]\n",
" [0 0 0 0 0 0 1 0 0 0]\n",
" [0 0 0 0 0 0 1 0 0 0]\n",
" [0 0 0 0 0 0 1 0 0 0]\n",
" [0 0 0 0 0 0 1 0 0 0]\n",
" [0 0 0 0 0 0 1 0 0 0]\n",
" [0 0 0 0 1 0 0 0 0 0]\n",
" [0 0 0 0 1 0 0 0 0 0]\n",
" [0 0 0 0 1 0 0 0 0 0]\n",
" [0 0 0 0 1 0 0 0 0 0]\n",
" [0 0 0 0 1 0 0 0 0 0]\n",
" [1 0 0 0 0 0 0 0 0 0]\n",
" [1 0 0 0 0 0 0 0 0 0]\n",
" [1 0 0 0 0 0 0 0 0 0]\n",
" [1 0 0 0 0 0 0 0 0 0]\n",
" [1 0 0 0 0 0 0 0 0 0]\n",
" [1 0 0 0 0 0 0 0 0 0]\n",
" [1 0 0 0 0 0 0 0 0 0]\n",
" [0 0 0 0 0 0 0 0 1 0]\n",
" [0 0 0 0 0 0 0 0 1 0]\n",
" [0 0 0 0 0 0 0 0 1 0]\n",
" [0 0 0 0 0 0 0 0 1 0]\n",
" [0 0 0 0 0 0 0 0 1 0]\n",
" [0 0 0 0 0 0 0 0 1 0]\n",
" [0 0 0 0 0 1 0 0 0 0]\n",
" [0 0 0 0 0 1 0 0 0 0]\n",
" [0 0 0 0 0 1 0 0 0 0]\n",
" [0 0 1 0 0 0 0 0 0 0]\n",
" [0 0 1 0 0 0 0 0 0 0]\n",
" [0 0 1 0 0 0 0 0 0 0]\n",
" [0 0 1 0 0 0 0 0 0 0]\n",
" [0 0 0 1 0 0 0 0 0 0]\n",
" [0 0 0 1 0 0 0 0 0 0]\n",
" [0 0 0 1 0 0 0 0 0 0]\n",
" [0 0 0 1 0 0 0 0 0 0]\n",
" [0 0 0 0 0 0 0 1 0 0]\n",
" [0 0 0 0 0 0 0 1 0 0]\n",
" [0 0 0 0 0 0 0 1 0 0]\n",
" [0 0 0 0 0 0 0 1 0 0]\n",
" [0 1 0 0 0 0 0 0 0 0]\n",
" [0 1 0 0 0 0 0 0 0 0]\n",
" [0 1 0 0 0 0 0 0 0 0]\n",
" [0 0 0 0 0 0 0 0 0 1]\n",
" [0 0 0 0 0 0 0 0 0 1]\n",
" [0 0 0 0 0 0 0 0 0 1]]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Loading: 100%|███████████████████████████████████████████████████████| 2225192/2225192 [00:20<00:00, 1634466.61bytes/s]"
]
}
],
2021-03-19 00:36:47 +01:00
"source": [
"print(training)\n",
"print(output)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "yCFKTbjZ12wh"
},
"source": [
"# 3. Model i jego ćwiczenie"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 15,
2021-03-19 00:36:47 +01:00
"metadata": {
"id": "MDA435sI1-Xl"
},
"outputs": [],
"source": [
"training = np.array(training) #zamiana typu dla sieci neuronowej\n",
"output = np.array(output) #zamiana typu dla sieci neuronowej"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "SvBURQCc3PBj"
},
"source": [
"##### 3.1. Stworzenie DLN i inicjacja modelu"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 16,
2021-03-19 00:36:47 +01:00
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "XaQJh1aG2hbj",
"outputId": "80420df0-3a78-4583-9563-2165e968713d"
},
2021-03-20 13:39:15 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From C:\\Users\\annad\\AppData\\Roaming\\Python\\Python38\\site-packages\\tflearn\\initializations.py:164: calling TruncatedNormal.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Call initializer instance with the dtype argument instead of passing it to the constructor\n"
]
}
],
2021-03-19 00:36:47 +01:00
"source": [
"tensorflow.compat.v1.reset_default_graph() #Reset na wszelki wypadek (w sumie nie wiem czy to jakaś super ważna linijka kodu)\n",
"\n",
"net = tflearn.input_data(shape=[None, len(training[0])]) #Input layer\n",
"net = tflearn.fully_connected(net, 8) #8 neurons for hidden layer\n",
"net = tflearn.fully_connected(net, 8) #8 neurons for hidden layer\n",
"#net = tflearn.fully_connected(net, 8) #8 neurons for hidden layer\n",
"net = tflearn.fully_connected(net, len(output[0]), activation=\"softmax\") #len(output) neurons for output layer + Softmax jako najlepsze wyjście dla tego typu danych\n",
"net = tflearn.regression(net)\n",
"\n",
"model = tflearn.DNN(net)\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Ktd1OcBa3PmQ"
},
"source": [
"##### 3.2. Trening Modelu"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 17,
2021-03-19 00:36:47 +01:00
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "REzkJL_r2hwl",
"outputId": "7ab2b0c5-944f-4e22-d478-1e35b41f87db"
},
2021-03-20 13:39:15 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training Step: 5999 | total loss: \u001b[1m\u001b[32m0.02785\u001b[0m\u001b[0m | time: 0.007s\n",
"| Adam | epoch: 1000 | loss: 0.02785 - acc: 0.9753 -- iter: 40/48\n",
"Training Step: 6000 | total loss: \u001b[1m\u001b[32m0.02583\u001b[0m\u001b[0m | time: 0.008s\n",
"| Adam | epoch: 1000 | loss: 0.02583 - acc: 0.9777 -- iter: 48/48\n",
"--\n",
"INFO:tensorflow:C:\\Users\\annad\\Desktop\\System Dialogowy Janet\\model.tflearn is not in all_model_checkpoint_paths. Manually adding it.\n"
]
}
],
2021-03-19 00:36:47 +01:00
"source": [
"model.fit(training, output, n_epoch=1000, batch_size=8, show_metric=True)\n",
"\n",
"#Zapis Modelu\n",
2021-03-20 13:39:15 +01:00
"model.save(\"model.tflearn\")"
2021-03-19 00:36:47 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "G-L6TV_63iYs"
},
"source": [
"# 4. Input Użytkownika"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "c6UvIrWu-a38"
},
"source": [
"##### 4.1 Funkcja **\"bag_of_words(s, words)\"** do stemmowania twojego zdania, i przypisania mu formy binarnej"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 18,
2021-03-19 00:36:47 +01:00
"metadata": {
"id": "1IQyV1v33lC7"
},
"outputs": [],
"source": [
"def bag_of_words(s, words):\n",
" bag = [0 for _ in range(len(words))]\n",
"\n",
" s_words = nltk.word_tokenize(s)\n",
" s_words = [stemmer_pl.stem(word.lower()) for word in s_words]\n",
"\n",
" for se in s_words:\n",
" for i, w in enumerate(words):\n",
" if w == se:\n",
" bag[i] = 1\n",
" return np.array(bag)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "rXq-wj-F-5DE"
},
"source": [
"##### 4.2 Funkcja **\"chat()\"** do rozmowy z botem"
]
},
{
"cell_type": "code",
2021-03-20 13:39:15 +01:00
"execution_count": 19,
2021-03-19 00:36:47 +01:00
"metadata": {
"id": "Je6OSZ679-KL"
},
"outputs": [],
"source": [
"def chat():\n",
" print(\"Możesz rozpocząć rozmowę z Botem! (type quit to stop)\")\n",
" while True: #Ciągła rozmowa\n",
" inp = input(\"Ty: \")\n",
" if inp.lower() == \"quit\": #Quit by wyjść z loopa\n",
" break\n",
"\n",
" result = model.predict([bag_of_words(inp,words)]) #Predictowanie przy pomocy wyćwiczonego modelu\n",
" result_index = np.argmax(result)\n",
" tag = labels[result_index]\n",
" \n",
2021-03-20 13:39:15 +01:00
" for tg in data_pl[\"intents\"]: #znalezienie poprawnego tagu do zdania\n",
2021-03-19 00:36:47 +01:00
" if tg['tag'] == tag:\n",
" responses = tg['responses']\n",
" \n",
" print(random.choice(responses)) #Wyprintuj losową odpowiedz z danego zbioru odpowiedzi"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ifvjglbO_SEA"
},
"source": [
"# 5. Rozmowa z botem!"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "VZf_aCUM-Amm",
"outputId": "9e3fcf7b-b9b3-47b0-acb5-48214f07f363"
},
2021-03-20 13:39:15 +01:00
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Możesz rozpocząć rozmowę z Botem! (type quit to stop)\n",
"Ty: elo\n",
"Cześć!\n",
"Ty: w co grasz\n",
"W OSRS\n",
"Ty: nara\n",
"Hej, w czym mogę pomóc?\n",
"Ty: narazie\n",
"Narazie!\n",
"Ty: do widzenia\n",
"Mam nadzieję, że później pogadamy\n",
"Ty: dowidzenia\n",
"Narazie!\n",
"Ty: ok\n",
"Hej, w czym mogę pomóc?\n"
]
}
],
2021-03-19 00:36:47 +01:00
"source": [
"chat()"
]
}
],
"metadata": {
"colab": {
"name": "DL_Chatbot_ver_1_0.ipynb",
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 1
}