diff --git a/data/parsing_semantyczny.ipynb b/data/parsing_semantyczny.ipynb new file mode 100644 index 0000000..3d6d370 --- /dev/null +++ b/data/parsing_semantyczny.ipynb @@ -0,0 +1,352 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "169e11e8-19c7-47ae-b8af-817e2e474ee7", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting book.jsgf\n" + ] + } + ], + "source": [ + "%%writefile book.jsgf\n", + "#JSGF V1.0 UTF-8 pl;\n", + "\n", + "grammar book;\n", + "\n", + "public = chcialbym zarezerwowac stolik ;\n", + "\n", + " = na {day};\n", + "\n", + " = dzisiaj | jutro | poniedzialek | wtorek | srode | czwartek | piatek | sobote | niedziele;\n", + "\n", + " = na [godzine] {hour};\n", + "\n", + " = [];\n", + "\n", + " = dziewiata | dziesiata | jedenasta | dwunasta;\n", + "\n", + " = pietnascie | trzydziesci;\n", + "\n", + " = (na | dla) {size} osob;\n", + "\n", + " = dwie | dwoch | trzy | trzech | cztery | czterech | piec | pieciu;" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "88cc9879-a59c-4c90-adb6-1022c42913e6", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting order.jsgf\n" + ] + } + ], + "source": [ + "%%writefile order.jsgf\n", + "#JSGF V1.0 UTF-8 pl;\n", + "\n", + "grammar order;\n", + "\n", + "public = (chcialbym | chcialabym) zamowic ;\n", + "\n", + " = {dish};\n", + "\n", + " = pizza peperoni | spaghetti carbonara | burger szefa;\n", + "\n", + " = (do stolika | na stolik ) {stolik};\n", + "\n", + " = 100 | 101 | 102 | 103 | 104 | 105;" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c5ffadfe-ea18-45ab-a9f9-b2cfd3c2fee7", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting recommend.jsgf\n" + ] + } + ], + "source": [ + "%%writefile recommend.jsgf\n", + "#JSGF V1.0 UTF-8 pl;\n", + "\n", + "grammar recommend;\n", + "\n", + "public = (jakie|jaki|jaka|jakiego) polecasz;\n", + "\n", + " = {food_or_drink};\n", + "\n", + " = jedzenia | picia | napoj | danie | dania;\n", + "\n", + "public = ((jakie|jaki|jaka|jakiego) ) *;\n", + "\n", + "public = ((jakie|jaki|jaka|jakiego) ) *;\n", + "\n", + " = {dish};\n", + "\n", + " = makarony | makaron | pizze | pizza | burgery | burger | salatki | salatka | deser | desery;\n", + "\n", + " = {drink};\n", + "\n", + " = piwo | piwa | wino | wina | sok | soki | smoothie | kawe | kawy | kawe | herbate | herbata | herbaty;" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "57fb9886-0232-4dd9-b385-7cae1b068f6e", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Grammar(version=1.0, charset=UTF-8, language=pl, name=order)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import jsgf\n", + "\n", + "book_grammar = jsgf.parse_grammar_file('book.jsgf')\n", + "order_grammar = jsgf.parse_grammar_file('order.jsgf')\n", + "recommend_grammar = jsgf.parse_grammar_file('recommend.jsgf')\n", + "order_grammar" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8f5acde6-4f3c-425d-a202-f361b219922d", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[Rule(name='rezerwuj', visible=True, expansion=Sequence(Literal('chcialbym zarezerwowac stolik'), NamedRuleRef('dzien_rezerwacji'), NamedRuleRef('godzina_rezerwacji'), NamedRuleRef('liczba_osob')))]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "utterance = 'chcialbym zarezerwowac stolik na jutro na godzine dwunasta trzydziesci na piec osob'\n", + "matched = book_grammar.find_matching_rules(utterance)\n", + "if not matched:\n", + " matched = order_grammar.find_matching_rules(utterance)\n", + "if not matched:\n", + " matched = recommend_grammar.find_matching_rules(utterance)\n", + "matched" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6cb7d2ba-d8ee-403a-bbf3-d7ea3e0475ba", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[Rule(name='zamow', visible=True, expansion=Sequence(ParsedAlternativeSet(Literal('chcialbym'), Literal('chcialabym')), Literal('zamowic'), NamedRuleRef('danie'), NamedRuleRef('stolik')))]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "utterance = 'chcialbym zamowic burger szefa do stolika 104'\n", + "matched = book_grammar.find_matching_rules(utterance)\n", + "if not matched:\n", + " matched = order_grammar.find_matching_rules(utterance)\n", + "if not matched:\n", + " matched = recommend_grammar.find_matching_rules(utterance)\n", + "matched" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "371ee857-418d-4546-8e29-88b6c8706659", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Rule(name='polec_danie', visible=True, expansion=KleeneStar(RequiredGrouping(Sequence(RequiredGrouping(ParsedAlternativeSet(Literal('jakie'), Literal('jaki'), Literal('jaka'), Literal('jakiego'))), NamedRuleRef('danie'))))),\n", + " Rule(name='polec_napoj', visible=True, expansion=KleeneStar(RequiredGrouping(Sequence(RequiredGrouping(ParsedAlternativeSet(Literal('jakie'), Literal('jaki'), Literal('jaka'), Literal('jakiego'))), NamedRuleRef('napoj')))))]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "utterance = 'jakie makarony polecasz'\n", + "matched = book_grammar.find_matching_rules(utterance)\n", + "if not matched:\n", + " matched = order_grammar.find_matching_rules(utterance)\n", + "if not matched:\n", + " matched = recommend_grammar.find_matching_rules(utterance)\n", + "matched" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "c914cc3d-b297-4e05-917c-6a2f09c0f8a9", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'act': 'recommend', 'slots': [('dish', 'makarony')]}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def get_dialog_act(rule):\n", + " slots = []\n", + " get_slots(rule.expansion, slots)\n", + " return {'act': rule.grammar.name, 'slots': slots}\n", + "\n", + "def get_slots(expansion, slots):\n", + " if expansion.tag != '':\n", + " slots.append((expansion.tag, expansion.current_match))\n", + " return\n", + "\n", + " for child in expansion.children:\n", + " get_slots(child, slots)\n", + "\n", + " if not expansion.children and isinstance(expansion, jsgf.NamedRuleRef):\n", + " get_slots(expansion.referenced_rule.expansion, slots)\n", + "\n", + "get_dialog_act(matched[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "91eaed10-e8c3-4429-a232-d317b84ec38f", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'act': 'book',\n", + " 'slots': [('day', 'jutro'), ('hour', 'dziesiata'), ('size', 'trzech')]}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def nlu(utterance):\n", + " matched = book_grammar.find_matching_rules(utterance)\n", + " if not matched:\n", + " matched = order_grammar.find_matching_rules(utterance)\n", + " if not matched:\n", + " matched = recommend_grammar.find_matching_rules(utterance)\n", + "\n", + " if matched:\n", + " return get_dialog_act(matched[0])\n", + " else:\n", + " return {'act': 'null', 'slots': []}\n", + "\n", + "nlu('chcialbym zarezerwowac stolik na jutro na godzine dziesiata dla trzech osob')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "d85e8543-be59-4c5f-b9f9-27631a899c2a", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'act': 'recommend', 'slots': [('dish', 'makarony')]}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nlu('jakie makarony polecasz')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}