{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "completed-luxury", "metadata": {}, "outputs": [], "source": [ "%%writefile ./grammar/hello.jsgf\n", "#JSGF V1.0 UTF-8 pl;\n", "\n", "grammar hello;\n", "\n", "public = ;\n", "\n", " = czesc | hej | witaj | hey | hello | dzień dobry | siema | siemanko;" ] }, { "cell_type": "code", "execution_count": 26, "id": "stable-teacher", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting ./grammar/repertuar.jsgf\n" ] } ], "source": [ "%%writefile ./grammar/repertuar.jsgf\n", "#JSGF V1.0 UTF-8 pl;\n", "\n", "grammar repertuar;\n", "\n", "public = [aktualny | obecny | aktualnie | obecnie | teraz] [repertuar];\n", "\n", " = prosze podac | podaj | jaki jest | co [teraz] gracie | co leci | jakie sa filmy | jakie filmy gracie;\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "broken-typing", "metadata": {}, "outputs": [], "source": [ "%%writefile ./grammar/cancel.jsgf\n", "#JSGF V1.0 UTF-8 pl;\n", "\n", "grammar cancel;\n", "\n", "public = ;\n", "\n", " = chce | chcialbym | chcialabym | prosze;\n", "\n", " = odwolac | zrezygnowac | anulowac ([bilety]|[bilet]|[z biletow]|[rezerwacje]|[z rezerwacji]);" ] }, { "cell_type": "code", "execution_count": 25, "id": "moving-dictionary", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting ./grammar/book.jsgf\n" ] } ], "source": [ "%%writefile ./grammar/book.jsgf\n", "#JSGF V1.0 UTF-8 pl;\n", "\n", "grammar book;\n", "\n", "public = [] [] [] [];\n", "\n", " = (chce | chcialbym | chcialabym | poprosze) [zarezerwowac];\n", "\n", " = {ilosc} (bilety | biletow | bilet);\n", "\n", " = jeden | dwa | trzy | cztery | piec | szesc | siedem | osiem | dziewiec | dziesiec |1|2|3|4|5|6|7|8|9|10;\n", "\n", " = na [film] {tytul};\n", "\n", " = Batman | Batmana | Uncharted | Pitbull | Ambulans | Bunkier strachu | Corka | Corke | Inni ludzie | Śmierć na Nilu | Skarb Mikołajka;\n", "\n", " = na {dzien};\n", "\n", " = (dzisiaj | jutro | poniedziałek | wtorek | srode | czwartek | piatek | sobotę | niedziele) | ;\n", "\n", " = (8|9|10|11|12|13|14|15) [czerwca]; \n", "\n", " = na [godzinę] {godzina};\n", "\n", " = [];\n", "\n", " = 10|11|12|13|14|15|16|17|18|19|20|21|22|23 | dziesiata | jedenasta | dwunasta | trzynasta | czternasta | pietnasta | szesnasta | siedemnasta | osiemnasta | dziewietnasta | dwudziesta;\n", "\n", " = pietnaście | trzydzieści | czterdziesci piec| 15 | 30 | 45;" ] }, { "cell_type": "code", "execution_count": 15, "id": "democratic-vietnamese", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting ./grammar/ilosc_bil.jsgf\n" ] } ], "source": [ "%%writefile ./grammar/ilosc_bil.jsgf\n", "#JSGF V1.0 UTF-8 pl;\n", "\n", "grammar ilosc_bil;\n", "\n", "public = [] [];\n", "\n", " = (chce | chcialbym | chcialabym | poprosze) [zarezerwowac];\n", "\n", " = {ilosc} [(bilety | biletow | bilet)];\n", "\n", " = jeden | dwa | trzy | cztery | piec | szesc | siedem | osiem | dziewiec | dziesiec |1|2|3|4|5|6|7|8|9|10;\n" ] }, { "cell_type": "code", "execution_count": 24, "id": "valid-provincial", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting ./grammar/tyt_filmu.jsgf\n" ] } ], "source": [ "%%writefile ./grammar/tyt_filmu.jsgf\n", "#JSGF V1.0 UTF-8 pl;\n", "\n", "grammar tyt_filmu;\n", "\n", "public = [] [];\n", "\n", " = (chce | chcialbym | chcialabym | poprosze) [zarezerwowac];\n", "\n", " = [na] [film] {tytul};\n", "\n", " = Batman | Batmana | Uncharted | Pitbull | Ambulans | Bunkier strachu | Corka | Corke | Inni ludzie | Śmierć na Nilu | Skarb Mikołajka;" ] }, { "cell_type": "code", "execution_count": 23, "id": "declared-vessel", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting ./grammar/dni.jsgf\n" ] } ], "source": [ "%%writefile ./grammar/dni.jsgf\n", "#JSGF V1.0 UTF-8 pl;\n", "\n", "grammar dni;\n", "\n", "public = [] [];\n", "\n", " = (chce | chcialbym | chcialabym | poprosze) [zarezerwowac];\n", "\n", " = na {dzien};\n", "\n", " = (dzisiaj | jutro | poniedziałek | wtorek | srode | czwartek | piatek | sobotę | niedziele) | ;\n", "\n", " = (8|9|10|11|12|13|14|15) [czerwca]; \n" ] }, { "cell_type": "code", "execution_count": 21, "id": "animated-guarantee", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting ./grammar/godziny.jsgf\n" ] } ], "source": [ "%%writefile ./grammar/godziny.jsgf\n", "#JSGF V1.0 UTF-8 pl;\n", "\n", "grammar godziny;\n", "\n", "public = [] [];\n", "\n", " = (chce | chcialbym | chcialabym | poprosze) [zarezerwowac];\n", "\n", " = na [godzinę] {godzina};\n", "\n", " = [];\n", "\n", " = 10|11|12|13|14|15|16|17|18|19|20|21|22|23 | dziesiata | jedenasta | dwunasta | trzynasta | czternasta | pietnasta | szesnasta | siedemnasta | osiemnasta | dziewietnasta | dwudziesta;\n", "\n", " = pietnaście | trzydzieści | czterdziesci piec| 15 | 30 | 45;" ] }, { "cell_type": "code", "execution_count": 7, "id": "registered-product", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting ./grammar/miejsca.jsgf\n" ] } ], "source": [ "%%writefile ./grammar/miejsca.jsgf\n", "#JSGF V1.0 UTF-8 pl;\n", "\n", "grammar miejsca;\n", "\n", "public = [z | na | w] {miejsce};\n", "\n", " = dole | gorze | srodku | tylu | blizej | przodu;\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "neutral-thumbnail", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting ./grammar/nr_tel.jsgf\n" ] } ], "source": [ "%%writefile ./grammar/nr_tel.jsgf\n", "#JSGF V1.0 UTF-8 pl;\n", "\n", "grammar numer;\n", "\n", "public = {numer};\n", "\n", " = 123456789 | 123123123 | 123456123;\n" ] }, { "cell_type": "code", "execution_count": null, "id": "confirmed-quantum", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 9, "id": "looking-alias", "metadata": {}, "outputs": [], "source": [ "import jsgf\n", "from os import listdir\n", "from os.path import isfile, join\n", "\n", "mypath = \"./grammar/\"\n", "onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]\n", "\n", "grammars = []\n", "\n", "for grammarFile in onlyfiles:\n", " grammar = jsgf.parse_grammar_file(mypath + grammarFile)\n", " grammars.append(grammar)\n", " \n", "\n", "def get_dialog_act(rule):\n", " slots = []\n", " get_slots(rule.expansion, slots)\n", " return {'act': rule.grammar.name, 'slots': slots}\n", "\n", "def get_slots(expansion, slots):\n", " if expansion.tag != '':\n", " slots.append((expansion.tag, expansion.current_match))\n", " return\n", "\n", " for child in expansion.children:\n", " get_slots(child, slots)\n", "\n", " if not expansion.children and isinstance(expansion, jsgf.NamedRuleRef):\n", " get_slots(expansion.referenced_rule.expansion, slots)\n", "\n", "def nlu(utterance):\n", " matched = None\n", " for grammar in grammars:\n", " matched = grammar.find_matching_rules(utterance)\n", " if matched:\n", " break\n", "\n", " if matched:\n", " return get_dialog_act(matched[0])\n", " else:\n", " return {'act': 'null', 'slots': []}\n", "\n", " \n", "def ajn(text):\n", " frame = nlu(text)\n", " return frame\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "statutory-falls", "metadata": {}, "outputs": [], "source": [ "ajn = Ajn()" ] }, { "cell_type": "code", "execution_count": 5, "id": "through-function", "metadata": {}, "outputs": [ { "data": { "text/plain": [ ">" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ajn.nlu" ] }, { "cell_type": "code", "execution_count": 7, "id": "fantastic-yemen", "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "nlu() takes 1 positional argument but 2 were given", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0majn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnlu\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Chcialabym zarezerwowac 10 biletow na film corke na 16 45'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mTypeError\u001b[0m: nlu() takes 1 positional argument but 2 were given" ] } ], "source": [ "result = ajn.nlu('Chcialabym zarezerwowac 10 biletow na film corke na 16 45')\n", "result" ] }, { "cell_type": "code", "execution_count": null, "id": "chemical-athens", "metadata": {}, "outputs": [], "source": [ "!jupyter nbconvert --to script MST.ipynb" ] }, { "cell_type": "code", "execution_count": null, "id": "quantitative-proposition", "metadata": {}, "outputs": [], "source": [ "!jupyter nbconvert --to script AJN_final.ipynb" ] }, { "cell_type": "code", "execution_count": null, "id": "based-action", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 1, "id": "cosmetic-beijing", "metadata": {}, "outputs": [], "source": [ "import jsgf\n", "from os import listdir\n", "from os.path import isfile, join\n", "\n", "mypath = \"./grammar/\"\n", "onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]\n", "\n", "grammars = []\n", "\n", "for grammarFile in onlyfiles:\n", " grammar = jsgf.parse_grammar_file(mypath + grammarFile)\n", " grammars.append(grammar)\n", "\n", " \n", " \n", "def get_dialog_act(rule):\n", " slots = []\n", " get_slots(rule.expansion, slots)\n", " return {'act': rule.grammar.name, 'slots': slots}\n", "\n", "def get_slots(expansion, slots):\n", " if expansion.tag != '':\n", " slots.append((expansion.tag, expansion.current_match))\n", " return\n", "\n", " for child in expansion.children:\n", " get_slots(child, slots)\n", "\n", " if not expansion.children and isinstance(expansion, jsgf.NamedRuleRef):\n", " get_slots(expansion.referenced_rule.expansion, slots)\n", "\n", "def nlu(utterance):\n", " matched = None\n", " for grammar in grammars:\n", " matched = grammar.find_matching_rules(utterance)\n", " if matched:\n", " break\n", "\n", " if matched:\n", " return get_dialog_act(matched[0])\n", " else:\n", " return {'act': 'null', 'slots': []}\n", " \n", "res = nlu('chcialbym zarezerwowac 2 bilety na corke na 16')" ] }, { "cell_type": "code", "execution_count": null, "id": "detected-crowd", "metadata": {}, "outputs": [], "source": [ "ajn = Ajn()" ] }, { "cell_type": "code", "execution_count": 2, "id": "technological-applicant", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'act': 'book',\n", " 'slots': [('ilosc', '2'),\n", " ('tytul', 'Corke'),\n", " ('dzien', None),\n", " ('godzina', '16')]}" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res" ] }, { "cell_type": "code", "execution_count": null, "id": "acknowledged-measurement", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 3, "id": "living-reservoir", "metadata": {}, "outputs": [], "source": [ "\n", "class Dst:\n", " \n", " def __init__(self):\n", " self.messages = []\n", " self.checklist = {\n", " \"ilosc\": None,\n", " \"tytul\": None,\n", " \"dzien\": None,\n", " \"godzina\": None\n", " }\n", " self.history = []\n", " \n", " def store(self, message):\n", " self.messages.append(message)\n", "\n", " def get_messages(self):\n", " return self.messages\n", "\n", " def get_next_question(self):\n", " for key, value in self.checklist.items():\n", " if value == None:\n", " return key\n", "\n", " def save_answer(self, slots):\n", " for slot in slots:\n", " self.checklist[slot[0]] = slot[1]\n", "\n", " self.messages.append(slots)\n", " \n", " def update(self, user_act=None):\n", " for intent, domain, slot, value in user_act:\n", " domain = domain.lower()\n", " intent = intent.lower()\n", " slot = slot.lower()\n", " print(domain,intent,slot)" ] }, { "cell_type": "code", "execution_count": 4, "id": "organizational-overhead", "metadata": {}, "outputs": [], "source": [ "dst = Dst()" ] }, { "cell_type": "code", "execution_count": 5, "id": "flying-preliminary", "metadata": {}, "outputs": [], "source": [ "dst.save_answer(res['slots'])" ] }, { "cell_type": "code", "execution_count": 6, "id": "effective-arbitration", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('ilosc', '2'), ('tytul', 'Corke'), ('dzien', None), ('godzina', '16')]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res['slots']" ] }, { "cell_type": "code", "execution_count": 7, "id": "warming-oracle", "metadata": {}, "outputs": [], "source": [ "next_question = dst.get_next_question()" ] }, { "cell_type": "code", "execution_count": 8, "id": "immune-conducting", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'ilosc': '2', 'tytul': 'Corke', 'dzien': None, 'godzina': '16'}\n" ] } ], "source": [ "print(dst.checklist)" ] }, { "cell_type": "code", "execution_count": null, "id": "therapeutic-ending", "metadata": {}, "outputs": [], "source": [ "user_act" ] }, { "cell_type": "code", "execution_count": null, "id": "clinical-removal", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "elder-complexity", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "dzien\n" ] } ], "source": [ "next_question = dst.get_next_question()\n", "while (next_question):\n", " print(next_question)\n", " response = input()\n", " res = nlu(response)\n", " dst.save_answer(res['slots'])\n", " next_question = dst.get_next_question()" ] }, { "cell_type": "code", "execution_count": null, "id": "leading-gospel", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "cleared-malpractice", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "single-browser", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "sunrise-zoning", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 5 }