System_Dialogowy_Janet/.ipynb_checkpoints/DL_Chatbot_ver_1_0-checkpoint.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "HxtCFj1hfXw6"
   },
   "source": [
    "# 0. Instalacja i importowanie modułów"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "enDE5aTIgN-v"
   },
   "source": [
    "##### 0.1. Ogólne"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "D7_8XDfpfH-X"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting tflearn==0.5 (from -r requirements.txt (line 1))\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/e7/3c/0b156d08ef3d4e2a8009ecab2af1ad2e304f6fb99562b6271c68a74a4397/tflearn-0.5.0.tar.gz (107kB)\n",
      "\u001b[K     |████████████████████████████████| 112kB 1.7MB/s eta 0:00:01\n",
      "\u001b[?25hCollecting tensorflow (from -r requirements.txt (line 2))\n",
      "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/70/dc/e8c5e7983866fa4ef3fd619faa35f660b95b01a2ab62b3884f038ccab542/tensorflow-2.4.1-cp37-cp37m-manylinux2010_x86_64.whl (394.3MB)\n",
      "\u001b[K     |█████████████████████▍          | 263.6MB 2.0MB/s eta 0:01:06    |▉                               | 10.3MB 2.0MB/s eta 0:03:11     |██                              | 24.7MB 2.3MB/s eta 0:02:38     |██▏                             | 26.4MB 2.1MB/s eta 0:02:59     |███▍                            | 42.2MB 1.8MB/s eta 0:03:14     |█████████▊                      | 120.4MB 2.7MB/s eta 0:01:42     |██████████▉                     | 133.4MB 2.4MB/s eta 0:01:49     |███████████████▍                | 190.0MB 3.0MB/s eta 0:01:08     |█████████████████               | 209.0MB 2.5MB/s eta 0:01:15     |██████████████████▌             | 227.7MB 2.8MB/s eta 0:01:00     |██████████████████▉             | 232.4MB 2.6MB/s eta 0:01:03     |███████████████████▊            | 242.5MB 3.2MB/s eta 0:00:47     |███████████████████▊            | 242.9MB 3.2MB/s eta 0:00:47"
     ]
    }
   ],
   "source": [
    "!pip install -r requirements.txt --user\n",
    "!pip list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "GOGs4hL6fwwK"
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import tflearn\n",
    "import tensorflow\n",
    "import random\n",
    "import json"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Mr0ZD1L2gCWw"
   },
   "source": [
    "##### 0.2. Angielski Stemmer: https://www.nltk.org/_modules/nltk/stem/lancaster.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "jy4-9guXgBY3"
   },
   "outputs": [],
   "source": [
    "import nltk\n",
    "\n",
    "nltk.download('punkt')\n",
    "from nltk.stem.lancaster import LancasterStemmer\n",
    "stemmer_en = LancasterStemmer()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "uPpcNQa_ggUl"
   },
   "source": [
    "##### 0.3. Polski Stemmer **(Docelowy)**: https://pypi.org/project/pystempel/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "XBpvJXn1gBDi"
   },
   "outputs": [],
   "source": [
    "from stempel import StempelStemmer\n",
    "\n",
    "stemmer_pl = StempelStemmer.default() #może wersja \".polimorf()\" jest lepsza?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Lg_3MO_3hQV_"
   },
   "source": [
    "# 1. Załadowanie plików **.json** z bazą słów"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "BzBo1657hn3w"
   },
   "source": [
    "##### 1.1. Docelowa baza słów polskich do nauki modelu (10 rodzajów odp - PL)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "jKsIW7hHhepB",
    "outputId": "09ba1cb1-bb0e-44ee-9d28-017209902934"
   },
   "outputs": [],
   "source": [
    "with open(\"intents_pl.json\", encoding='utf-8') as file:\n",
    "    data_pl = json.load(file)\n",
    "\n",
    "print(data_pl)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "g94eHpqshoat"
   },
   "source": [
    "##### 1.2. Skrócona baza słów (4 rodzaje odp - PL)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "gJbm_CtRhNOK",
    "outputId": "157196fc-6a25-4a70-aca3-9d886c743f6c"
   },
   "outputs": [],
   "source": [
    "with open(\"intents_pl_short.json\", encoding='utf-8') as file:\n",
    "  data_pl_short = json.load(file)\n",
    "\n",
    "print(data_pl_short)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "HjkIUMy2ho6C"
   },
   "source": [
    "##### 1.3. Testowa baza słów angielskich (6 rodzajów odp - EN)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "vW5FyoRqhfIc",
    "outputId": "378d8894-9c9c-46be-ade1-b6491f095179"
   },
   "outputs": [],
   "source": [
    "with open(\"intents_en.json\", encoding='utf-8') as file:\n",
    "  data_en = json.load(file)\n",
    "\n",
    "print(data_en)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "4BnsOkqqjBlr"
   },
   "source": [
    "# 2. Przygotowanie danych do nauki modelu"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "gy6p55-DjLyY"
   },
   "outputs": [],
   "source": [
    "words = []\n",
    "labels = []\n",
    "docs_x = []\n",
    "docs_y = []"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "XxZX-JQA5zjL"
   },
   "source": [
    "##### 2.1 Stworzenie tablicy ze wszystkimi możliwymi inputami użytkownika (+ labele)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "nBUKwSr_kVSd"
   },
   "outputs": [],
   "source": [
    "for intent in data_pl_short[\"intents\"]: #Loop przez cały json\n",
    "  for pattern in intent[\"patterns\"]: #loop przez wszystkie możliwe rodzaje przykładowego inputu użytkownika\n",
    "    wrds = nltk.word_tokenize(pattern) #Tokenizing every word\n",
    "    words.extend(wrds) #Add every single tokenized word\n",
    "    docs_x.append(wrds) #Add the whole tokenized sentence\n",
    "    docs_y.append(intent[\"tag\"]) #Pattern x coresponds to the tag y. Potrzebne do ustalenia relacji słowa z odpowiedzią\n",
    "\n",
    "  if intent[\"tag\"] not in labels:\n",
    "    labels.append(intent[\"tag\"]) #Add the tag"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "wOyP5lbikV1e"
   },
   "outputs": [],
   "source": [
    "words = [stemmer_pl.stem(w.lower()) for w in words if w not in \"?\"] #stemming -> take each word and bring it to the \"root\" form. Only the stemmed version of the word is important to us\n",
    "words = sorted(list(set(words))) #Sorting\n",
    "\n",
    "labels = sorted(labels) #sorting\n",
    "\n",
    "training = []\n",
    "output = []\n",
    "\n",
    "out_empty = [0 for _ in range(len(labels))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Podgląd zmiennych\n",
    "print(f\"Words:\\n{words}\")\n",
    "print(f\"labels:\\n{labels}\")\n",
    "print(f\"docs_y:\\n{docs_y}\")\n",
    "print(f\"docs_x:\\n{docs_x}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "WewUeunf5_Za"
   },
   "source": [
    "##### 3.2. Przypisywanie słów do danej kategorii (ie. \"Cześć\" do Greetings)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "1Q43_qtZ6KNP"
   },
   "source": [
    "W przypadku data_pl_short są tylko 4 rodzaje odpowiedzi. \"Cześć\" które zostane przypisane do labela \"greeting\" będzie miało formę końcowego outputu \"1000\" jeżeli label \"greetings\" jest pierwszy do wyboru."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "doFER5OS7CC_"
   },
   "source": [
    "Warto też dodać, że sieć neuronowa nie przyjmuje teksu. To jest główny powód czemu przypisujemy słowa do kategorii"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "8FDKrjpjkYsE"
   },
   "outputs": [],
   "source": [
    "for x, doc in enumerate(docs_x): #Przejście przez wszystkie słowa\n",
    "  bag =[]\n",
    "\n",
    "  wrds = [stemmer_pl.stem(w) for w in doc] #podział wszystkich słów w danym zdaniu\n",
    "\n",
    "  for w in words:\n",
    "    if w in wrds:\n",
    "      bag.append(1) #this word exist\n",
    "    else:\n",
    "      bag.append(0) #do not exist\n",
    "    \n",
    "  output_row = out_empty[:] #kopia\n",
    "  output_row[labels.index(docs_y[x])] = 1\n",
    "\n",
    "  training.append(bag) #dodajemy nowe wyrażenie zamienione na ciąg binarny\n",
    "  output.append(output_row)\n",
    "\n",
    "training = np.array(training) #Zbiór treningowy\n",
    "output = np.array(output) #Zbiór outputów"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "cJKUjbkC72-f",
    "outputId": "7e2bff96-78ce-49ff-b27b-eee77752228d"
   },
   "outputs": [],
   "source": [
    "len(training) #dla pl_short mamy 44 słowa"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "Kx43VDgS7-yN",
    "outputId": "4fa6f6fe-dc58-4e76-bb26-38c1784ab79c"
   },
   "outputs": [],
   "source": [
    "len(output[0]) #Które można przypisać do 4 kategorii"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(training)\n",
    "print(output)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "yCFKTbjZ12wh"
   },
   "source": [
    "# 3. Model i jego ćwiczenie"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "MDA435sI1-Xl"
   },
   "outputs": [],
   "source": [
    "training = np.array(training) #zamiana typu dla sieci neuronowej\n",
    "output = np.array(output) #zamiana typu dla sieci neuronowej"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "SvBURQCc3PBj"
   },
   "source": [
    "##### 3.1. Stworzenie DLN i inicjacja modelu"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "XaQJh1aG2hbj",
    "outputId": "80420df0-3a78-4583-9563-2165e968713d"
   },
   "outputs": [],
   "source": [
    "tensorflow.compat.v1.reset_default_graph() #Reset na wszelki wypadek (w sumie nie wiem czy to jakaś super ważna linijka kodu)\n",
    "\n",
    "net = tflearn.input_data(shape=[None, len(training[0])]) #Input layer\n",
    "net = tflearn.fully_connected(net, 8) #8 neurons for hidden layer\n",
    "net = tflearn.fully_connected(net, 8) #8 neurons for hidden layer\n",
    "#net = tflearn.fully_connected(net, 8) #8 neurons for hidden layer\n",
    "net = tflearn.fully_connected(net, len(output[0]), activation=\"softmax\") #len(output) neurons for output layer + Softmax jako najlepsze wyjście dla tego typu danych\n",
    "net = tflearn.regression(net)\n",
    "\n",
    "model = tflearn.DNN(net)\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "Ktd1OcBa3PmQ"
   },
   "source": [
    "##### 3.2. Trening Modelu"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "REzkJL_r2hwl",
    "outputId": "7ab2b0c5-944f-4e22-d478-1e35b41f87db"
   },
   "outputs": [],
   "source": [
    "model.fit(training, output, n_epoch=1000, batch_size=8, show_metric=True)\n",
    "\n",
    "#Zapis Modelu\n",
    "#model.save(\"model.tflearn\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "G-L6TV_63iYs"
   },
   "source": [
    "# 4. Input Użytkownika"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "c6UvIrWu-a38"
   },
   "source": [
    "##### 4.1 Funkcja **\"bag_of_words(s, words)\"** do stemmowania twojego zdania, i przypisania mu formy binarnej"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "1IQyV1v33lC7"
   },
   "outputs": [],
   "source": [
    "def bag_of_words(s, words):\n",
    "  bag = [0 for _ in range(len(words))]\n",
    "\n",
    "  s_words = nltk.word_tokenize(s)\n",
    "  s_words = [stemmer_pl.stem(word.lower()) for word in s_words]\n",
    "\n",
    "  for se in s_words:\n",
    "    for i, w in enumerate(words):\n",
    "      if w == se:\n",
    "        bag[i] = 1\n",
    "  return np.array(bag)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "rXq-wj-F-5DE"
   },
   "source": [
    "##### 4.2 Funkcja **\"chat()\"** do rozmowy z botem"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "Je6OSZ679-KL"
   },
   "outputs": [],
   "source": [
    "def chat():\n",
    "  print(\"Możesz rozpocząć rozmowę z Botem! (type quit to stop)\")\n",
    "  while True: #Ciągła rozmowa\n",
    "    inp = input(\"Ty: \")\n",
    "    if inp.lower() == \"quit\": #Quit by wyjść z loopa\n",
    "      break\n",
    "\n",
    "    result = model.predict([bag_of_words(inp,words)]) #Predictowanie przy pomocy wyćwiczonego modelu\n",
    "    result_index = np.argmax(result)\n",
    "    tag = labels[result_index]\n",
    "    \n",
    "    for tg in data_pl_short[\"intents\"]: #znalezienie poprawnego tagu do zdania\n",
    "      if tg['tag'] == tag:\n",
    "        responses = tg['responses']\n",
    "      \n",
    "    print(random.choice(responses)) #Wyprintuj losową odpowiedz z danego zbioru odpowiedzi"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "ifvjglbO_SEA"
   },
   "source": [
    "# 5. Rozmowa z botem!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "VZf_aCUM-Amm",
    "outputId": "9e3fcf7b-b9b3-47b0-acb5-48214f07f363"
   },
   "outputs": [],
   "source": [
    "chat()"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "name": "DL_Chatbot_ver_1_0.ipynb",
   "provenance": [],
   "toc_visible": true
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
requirements analysis update 2021-03-27 12:51:50 +01:00			`{`
			`"cells": [`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "HxtCFj1hfXw6"`
			`},`
			`"source": [`
			`"# 0. Instalacja i importowanie modułów"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "enDE5aTIgN-v"`
			`},`
			`"source": [`
			`"##### 0.1. Ogólne"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"id": "D7_8XDfpfH-X"`
			`},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"Collecting tflearn==0.5 (from -r requirements.txt (line 1))\n",`
			`"\u001b[?25l Downloading https://files.pythonhosted.org/packages/e7/3c/0b156d08ef3d4e2a8009ecab2af1ad2e304f6fb99562b6271c68a74a4397/tflearn-0.5.0.tar.gz (107kB)\n",`
			`"\u001b[K \|████████████████████████████████\| 112kB 1.7MB/s eta 0:00:01\n",`
			`"\u001b[?25hCollecting tensorflow (from -r requirements.txt (line 2))\n",`
			`"\u001b[?25l Downloading https://files.pythonhosted.org/packages/70/dc/e8c5e7983866fa4ef3fd619faa35f660b95b01a2ab62b3884f038ccab542/tensorflow-2.4.1-cp37-cp37m-manylinux2010_x86_64.whl (394.3MB)\n",`
			"\u001b[K \|█████████████████████▍ \| 263.6MB 2.0MB/s eta 0:01:06 \|▉ \| 10.3MB 2.0MB/s eta 0:03:11 \|██ \| 24.7MB 2.3MB/s eta 0:02:38 \|██▏ \| 26.4MB 2.1MB/s eta 0:02:59 \|███▍ \| 42.2MB 1.8MB/s eta 0:03:14 \|█████████▊ \| 120.4MB 2.7MB/s eta 0:01:42 \|██████████▉ \| 133.4MB 2.4MB/s eta 0:01:49 \|███████████████▍ \| 190.0MB 3.0MB/s eta 0:01:08 \|█████████████████ \| 209.0MB 2.5MB/s eta 0:01:15 \|██████████████████▌ \| 227.7MB 2.8MB/s eta 0:01:00 \|██████████████████▉ \| 232.4MB 2.6MB/s eta 0:01:03 \|███████████████████▊ \| 242.5MB 3.2MB/s eta 0:00:47 \|███████████████████▊ \| 242.9MB 3.2MB/s eta 0:00:47"
			`]`
			`}`
			`],`
			`"source": [`
			`"!pip install -r requirements.txt --user\n",`
			`"!pip list"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"id": "GOGs4hL6fwwK"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"import numpy as np\n",`
			`"import tflearn\n",`
			`"import tensorflow\n",`
			`"import random\n",`
			`"import json"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "Mr0ZD1L2gCWw"`
			`},`
			`"source": [`
			`"##### 0.2. Angielski Stemmer: https://www.nltk.org/_modules/nltk/stem/lancaster.html"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"id": "jy4-9guXgBY3"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"import nltk\n",`
			`"\n",`
			`"nltk.download('punkt')\n",`
			`"from nltk.stem.lancaster import LancasterStemmer\n",`
			`"stemmer_en = LancasterStemmer()"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "uPpcNQa_ggUl"`
			`},`
			`"source": [`
			`"##### 0.3. Polski Stemmer (Docelowy): https://pypi.org/project/pystempel/"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"id": "XBpvJXn1gBDi"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"from stempel import StempelStemmer\n",`
			`"\n",`
			`"stemmer_pl = StempelStemmer.default() #może wersja \".polimorf()\" jest lepsza?"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "Lg_3MO_3hQV_"`
			`},`
			`"source": [`
			`"# 1. Załadowanie plików .json z bazą słów"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "BzBo1657hn3w"`
			`},`
			`"source": [`
			`"##### 1.1. Docelowa baza słów polskich do nauki modelu (10 rodzajów odp - PL)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"colab": {`
			`"base_uri": "https://localhost:8080/"`
			`},`
			`"id": "jKsIW7hHhepB",`
			`"outputId": "09ba1cb1-bb0e-44ee-9d28-017209902934"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"with open(\"intents_pl.json\", encoding='utf-8') as file:\n",`
			`" data_pl = json.load(file)\n",`
			`"\n",`
			`"print(data_pl)"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "g94eHpqshoat"`
			`},`
			`"source": [`
			`"##### 1.2. Skrócona baza słów (4 rodzaje odp - PL)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"colab": {`
			`"base_uri": "https://localhost:8080/"`
			`},`
			`"id": "gJbm_CtRhNOK",`
			`"outputId": "157196fc-6a25-4a70-aca3-9d886c743f6c"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"with open(\"intents_pl_short.json\", encoding='utf-8') as file:\n",`
			`" data_pl_short = json.load(file)\n",`
			`"\n",`
			`"print(data_pl_short)"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "HjkIUMy2ho6C"`
			`},`
			`"source": [`
			`"##### 1.3. Testowa baza słów angielskich (6 rodzajów odp - EN)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"colab": {`
			`"base_uri": "https://localhost:8080/"`
			`},`
			`"id": "vW5FyoRqhfIc",`
			`"outputId": "378d8894-9c9c-46be-ade1-b6491f095179"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"with open(\"intents_en.json\", encoding='utf-8') as file:\n",`
			`" data_en = json.load(file)\n",`
			`"\n",`
			`"print(data_en)"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "4BnsOkqqjBlr"`
			`},`
			`"source": [`
			`"# 2. Przygotowanie danych do nauki modelu"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"id": "gy6p55-DjLyY"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"words = []\n",`
			`"labels = []\n",`
			`"docs_x = []\n",`
			`"docs_y = []"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "XxZX-JQA5zjL"`
			`},`
			`"source": [`
			`"##### 2.1 Stworzenie tablicy ze wszystkimi możliwymi inputami użytkownika (+ labele)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"id": "nBUKwSr_kVSd"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"for intent in data_pl_short[\"intents\"]: #Loop przez cały json\n",`
			`" for pattern in intent[\"patterns\"]: #loop przez wszystkie możliwe rodzaje przykładowego inputu użytkownika\n",`
			`" wrds = nltk.word_tokenize(pattern) #Tokenizing every word\n",`
			`" words.extend(wrds) #Add every single tokenized word\n",`
			`" docs_x.append(wrds) #Add the whole tokenized sentence\n",`
			`" docs_y.append(intent[\"tag\"]) #Pattern x coresponds to the tag y. Potrzebne do ustalenia relacji słowa z odpowiedzią\n",`
			`"\n",`
			`" if intent[\"tag\"] not in labels:\n",`
			`" labels.append(intent[\"tag\"]) #Add the tag"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"id": "wOyP5lbikV1e"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"words = [stemmer_pl.stem(w.lower()) for w in words if w not in \"?\"] #stemming -> take each word and bring it to the \"root\" form. Only the stemmed version of the word is important to us\n",`
			`"words = sorted(list(set(words))) #Sorting\n",`
			`"\n",`
			`"labels = sorted(labels) #sorting\n",`
			`"\n",`
			`"training = []\n",`
			`"output = []\n",`
			`"\n",`
			`"out_empty = [0 for _ in range(len(labels))]"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"#Podgląd zmiennych\n",`
			`"print(f\"Words:\\n{words}\")\n",`
			`"print(f\"labels:\\n{labels}\")\n",`
			`"print(f\"docs_y:\\n{docs_y}\")\n",`
			`"print(f\"docs_x:\\n{docs_x}\")"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "WewUeunf5_Za"`
			`},`
			`"source": [`
			`"##### 3.2. Przypisywanie słów do danej kategorii (ie. \"Cześć\" do Greetings)"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "1Q43_qtZ6KNP"`
			`},`
			`"source": [`
			`"W przypadku data_pl_short są tylko 4 rodzaje odpowiedzi. \"Cześć\" które zostane przypisane do labela \"greeting\" będzie miało formę końcowego outputu \"1000\" jeżeli label \"greetings\" jest pierwszy do wyboru."`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "doFER5OS7CC_"`
			`},`
			`"source": [`
			`"Warto też dodać, że sieć neuronowa nie przyjmuje teksu. To jest główny powód czemu przypisujemy słowa do kategorii"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"id": "8FDKrjpjkYsE"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"for x, doc in enumerate(docs_x): #Przejście przez wszystkie słowa\n",`
			`" bag =[]\n",`
			`"\n",`
			`" wrds = [stemmer_pl.stem(w) for w in doc] #podział wszystkich słów w danym zdaniu\n",`
			`"\n",`
			`" for w in words:\n",`
			`" if w in wrds:\n",`
			`" bag.append(1) #this word exist\n",`
			`" else:\n",`
			`" bag.append(0) #do not exist\n",`
			`" \n",`
			`" output_row = out_empty[:] #kopia\n",`
			`" output_row[labels.index(docs_y[x])] = 1\n",`
			`"\n",`
			`" training.append(bag) #dodajemy nowe wyrażenie zamienione na ciąg binarny\n",`
			`" output.append(output_row)\n",`
			`"\n",`
			`"training = np.array(training) #Zbiór treningowy\n",`
			`"output = np.array(output) #Zbiór outputów"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"colab": {`
			`"base_uri": "https://localhost:8080/"`
			`},`
			`"id": "cJKUjbkC72-f",`
			`"outputId": "7e2bff96-78ce-49ff-b27b-eee77752228d"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"len(training) #dla pl_short mamy 44 słowa"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"colab": {`
			`"base_uri": "https://localhost:8080/"`
			`},`
			`"id": "Kx43VDgS7-yN",`
			`"outputId": "4fa6f6fe-dc58-4e76-bb26-38c1784ab79c"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"len(output[0]) #Które można przypisać do 4 kategorii"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"print(training)\n",`
			`"print(output)"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "yCFKTbjZ12wh"`
			`},`
			`"source": [`
			`"# 3. Model i jego ćwiczenie"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"id": "MDA435sI1-Xl"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"training = np.array(training) #zamiana typu dla sieci neuronowej\n",`
			`"output = np.array(output) #zamiana typu dla sieci neuronowej"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "SvBURQCc3PBj"`
			`},`
			`"source": [`
			`"##### 3.1. Stworzenie DLN i inicjacja modelu"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"colab": {`
			`"base_uri": "https://localhost:8080/"`
			`},`
			`"id": "XaQJh1aG2hbj",`
			`"outputId": "80420df0-3a78-4583-9563-2165e968713d"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"tensorflow.compat.v1.reset_default_graph() #Reset na wszelki wypadek (w sumie nie wiem czy to jakaś super ważna linijka kodu)\n",`
			`"\n",`
			`"net = tflearn.input_data(shape=[None, len(training[0])]) #Input layer\n",`
			`"net = tflearn.fully_connected(net, 8) #8 neurons for hidden layer\n",`
			`"net = tflearn.fully_connected(net, 8) #8 neurons for hidden layer\n",`
			`"#net = tflearn.fully_connected(net, 8) #8 neurons for hidden layer\n",`
			`"net = tflearn.fully_connected(net, len(output[0]), activation=\"softmax\") #len(output) neurons for output layer + Softmax jako najlepsze wyjście dla tego typu danych\n",`
			`"net = tflearn.regression(net)\n",`
			`"\n",`
			`"model = tflearn.DNN(net)\n",`
			`"\n"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "Ktd1OcBa3PmQ"`
			`},`
			`"source": [`
			`"##### 3.2. Trening Modelu"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"colab": {`
			`"base_uri": "https://localhost:8080/"`
			`},`
			`"id": "REzkJL_r2hwl",`
			`"outputId": "7ab2b0c5-944f-4e22-d478-1e35b41f87db"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"model.fit(training, output, n_epoch=1000, batch_size=8, show_metric=True)\n",`
			`"\n",`
			`"#Zapis Modelu\n",`
			`"#model.save(\"model.tflearn\")"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "G-L6TV_63iYs"`
			`},`
			`"source": [`
			`"# 4. Input Użytkownika"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "c6UvIrWu-a38"`
			`},`
			`"source": [`
			`"##### 4.1 Funkcja \"bag_of_words(s, words)\" do stemmowania twojego zdania, i przypisania mu formy binarnej"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"id": "1IQyV1v33lC7"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"def bag_of_words(s, words):\n",`
			`" bag = [0 for _ in range(len(words))]\n",`
			`"\n",`
			`" s_words = nltk.word_tokenize(s)\n",`
			`" s_words = [stemmer_pl.stem(word.lower()) for word in s_words]\n",`
			`"\n",`
			`" for se in s_words:\n",`
			`" for i, w in enumerate(words):\n",`
			`" if w == se:\n",`
			`" bag[i] = 1\n",`
			`" return np.array(bag)"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "rXq-wj-F-5DE"`
			`},`
			`"source": [`
			`"##### 4.2 Funkcja \"chat()\" do rozmowy z botem"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"id": "Je6OSZ679-KL"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"def chat():\n",`
			`" print(\"Możesz rozpocząć rozmowę z Botem! (type quit to stop)\")\n",`
			`" while True: #Ciągła rozmowa\n",`
			`" inp = input(\"Ty: \")\n",`
			`" if inp.lower() == \"quit\": #Quit by wyjść z loopa\n",`
			`" break\n",`
			`"\n",`
			`" result = model.predict([bag_of_words(inp,words)]) #Predictowanie przy pomocy wyćwiczonego modelu\n",`
			`" result_index = np.argmax(result)\n",`
			`" tag = labels[result_index]\n",`
			`" \n",`
			`" for tg in data_pl_short[\"intents\"]: #znalezienie poprawnego tagu do zdania\n",`
			`" if tg['tag'] == tag:\n",`
			`" responses = tg['responses']\n",`
			`" \n",`
			`" print(random.choice(responses)) #Wyprintuj losową odpowiedz z danego zbioru odpowiedzi"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {`
			`"id": "ifvjglbO_SEA"`
			`},`
			`"source": [`
			`"# 5. Rozmowa z botem!"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {`
			`"colab": {`
			`"base_uri": "https://localhost:8080/"`
			`},`
			`"id": "VZf_aCUM-Amm",`
			`"outputId": "9e3fcf7b-b9b3-47b0-acb5-48214f07f363"`
			`},`
			`"outputs": [],`
			`"source": [`
			`"chat()"`
			`]`
			`}`
			`],`
			`"metadata": {`
			`"colab": {`
			`"name": "DL_Chatbot_ver_1_0.ipynb",`
			`"provenance": [],`
			`"toc_visible": true`
			`},`
			`"kernelspec": {`
			`"display_name": "Python 3",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
			`"version": "3.7.3"`
			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 1`
			`}`