Update parsing semantyczny
This commit is contained in:
parent
452a728a6e
commit
61cbd2d005
352
data/parsing_semantyczny.ipynb
Normal file
352
data/parsing_semantyczny.ipynb
Normal file
@ -0,0 +1,352 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "169e11e8-19c7-47ae-b8af-817e2e474ee7",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Overwriting book.jsgf\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%writefile book.jsgf\n",
|
||||
"#JSGF V1.0 UTF-8 pl;\n",
|
||||
"\n",
|
||||
"grammar book;\n",
|
||||
"\n",
|
||||
"public <rezerwuj> = chcialbym zarezerwowac stolik <dzien_rezerwacji> <godzina_rezerwacji> <liczba_osob> ;\n",
|
||||
"\n",
|
||||
"<dzien_rezerwacji> = na <dzien> {day};\n",
|
||||
"\n",
|
||||
"<dzien> = dzisiaj | jutro | poniedzialek | wtorek | srode | czwartek | piatek | sobote | niedziele;\n",
|
||||
"\n",
|
||||
"<godzina_rezerwacji> = na [godzine] <godzina_z_minutami> {hour};\n",
|
||||
"\n",
|
||||
"<godzina_z_minutami> = <godzina> [<minuty>];\n",
|
||||
"\n",
|
||||
"<godzina> = dziewiata | dziesiata | jedenasta | dwunasta;\n",
|
||||
"\n",
|
||||
"<minuty> = pietnascie | trzydziesci;\n",
|
||||
"\n",
|
||||
"<liczba_osob> = (na | dla) <liczba> {size} osob;\n",
|
||||
"\n",
|
||||
"<liczba> = dwie | dwoch | trzy | trzech | cztery | czterech | piec | pieciu;"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "88cc9879-a59c-4c90-adb6-1022c42913e6",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Overwriting order.jsgf\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%writefile order.jsgf\n",
|
||||
"#JSGF V1.0 UTF-8 pl;\n",
|
||||
"\n",
|
||||
"grammar order;\n",
|
||||
"\n",
|
||||
"public <zamow> = (chcialbym | chcialabym) zamowic <danie> <stolik>;\n",
|
||||
"\n",
|
||||
"<danie> = <potrawa> {dish};\n",
|
||||
"\n",
|
||||
"<potrawa> = pizza peperoni | spaghetti carbonara | burger szefa;\n",
|
||||
"\n",
|
||||
"<stolik> = (do stolika | na stolik ) <numer_stolika> {stolik};\n",
|
||||
"\n",
|
||||
"<numer_stolika> = 100 | 101 | 102 | 103 | 104 | 105;"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "c5ffadfe-ea18-45ab-a9f9-b2cfd3c2fee7",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Overwriting recommend.jsgf\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%writefile recommend.jsgf\n",
|
||||
"#JSGF V1.0 UTF-8 pl;\n",
|
||||
"\n",
|
||||
"grammar recommend;\n",
|
||||
"\n",
|
||||
"public <polec_ogolny> = (jakie|jaki|jaka|jakiego) <jedzenie_napoj> polecasz;\n",
|
||||
"\n",
|
||||
"<jedzenie_napoj> = <jedzenie_napoj_opcje> {food_or_drink};\n",
|
||||
"\n",
|
||||
"<jedzenie_napoj_opcje> = jedzenia | picia | napoj | danie | dania;\n",
|
||||
"\n",
|
||||
"public <polec_danie> = ((jakie|jaki|jaka|jakiego) <danie>) *;\n",
|
||||
"\n",
|
||||
"public <polec_napoj> = ((jakie|jaki|jaka|jakiego) <napoj>) *;\n",
|
||||
"\n",
|
||||
"<danie> = <dania> {dish};\n",
|
||||
"\n",
|
||||
"<dania> = makarony | makaron | pizze | pizza | burgery | burger | salatki | salatka | deser | desery;\n",
|
||||
"\n",
|
||||
"<napoj> = <napoje> {drink};\n",
|
||||
"\n",
|
||||
"<napoje> = piwo | piwa | wino | wina | sok | soki | smoothie | kawe | kawy | kawe | herbate | herbata | herbaty;"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "57fb9886-0232-4dd9-b385-7cae1b068f6e",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Grammar(version=1.0, charset=UTF-8, language=pl, name=order)"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import jsgf\n",
|
||||
"\n",
|
||||
"book_grammar = jsgf.parse_grammar_file('book.jsgf')\n",
|
||||
"order_grammar = jsgf.parse_grammar_file('order.jsgf')\n",
|
||||
"recommend_grammar = jsgf.parse_grammar_file('recommend.jsgf')\n",
|
||||
"order_grammar"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "8f5acde6-4f3c-425d-a202-f361b219922d",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Rule(name='rezerwuj', visible=True, expansion=Sequence(Literal('chcialbym zarezerwowac stolik'), NamedRuleRef('dzien_rezerwacji'), NamedRuleRef('godzina_rezerwacji'), NamedRuleRef('liczba_osob')))]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"utterance = 'chcialbym zarezerwowac stolik na jutro na godzine dwunasta trzydziesci na piec osob'\n",
|
||||
"matched = book_grammar.find_matching_rules(utterance)\n",
|
||||
"if not matched:\n",
|
||||
" matched = order_grammar.find_matching_rules(utterance)\n",
|
||||
"if not matched:\n",
|
||||
" matched = recommend_grammar.find_matching_rules(utterance)\n",
|
||||
"matched"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "6cb7d2ba-d8ee-403a-bbf3-d7ea3e0475ba",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Rule(name='zamow', visible=True, expansion=Sequence(ParsedAlternativeSet(Literal('chcialbym'), Literal('chcialabym')), Literal('zamowic'), NamedRuleRef('danie'), NamedRuleRef('stolik')))]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"utterance = 'chcialbym zamowic burger szefa do stolika 104'\n",
|
||||
"matched = book_grammar.find_matching_rules(utterance)\n",
|
||||
"if not matched:\n",
|
||||
" matched = order_grammar.find_matching_rules(utterance)\n",
|
||||
"if not matched:\n",
|
||||
" matched = recommend_grammar.find_matching_rules(utterance)\n",
|
||||
"matched"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "371ee857-418d-4546-8e29-88b6c8706659",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Rule(name='polec_danie', visible=True, expansion=KleeneStar(RequiredGrouping(Sequence(RequiredGrouping(ParsedAlternativeSet(Literal('jakie'), Literal('jaki'), Literal('jaka'), Literal('jakiego'))), NamedRuleRef('danie'))))),\n",
|
||||
" Rule(name='polec_napoj', visible=True, expansion=KleeneStar(RequiredGrouping(Sequence(RequiredGrouping(ParsedAlternativeSet(Literal('jakie'), Literal('jaki'), Literal('jaka'), Literal('jakiego'))), NamedRuleRef('napoj')))))]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"utterance = 'jakie makarony polecasz'\n",
|
||||
"matched = book_grammar.find_matching_rules(utterance)\n",
|
||||
"if not matched:\n",
|
||||
" matched = order_grammar.find_matching_rules(utterance)\n",
|
||||
"if not matched:\n",
|
||||
" matched = recommend_grammar.find_matching_rules(utterance)\n",
|
||||
"matched"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "c914cc3d-b297-4e05-917c-6a2f09c0f8a9",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'act': 'recommend', 'slots': [('dish', 'makarony')]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def get_dialog_act(rule):\n",
|
||||
" slots = []\n",
|
||||
" get_slots(rule.expansion, slots)\n",
|
||||
" return {'act': rule.grammar.name, 'slots': slots}\n",
|
||||
"\n",
|
||||
"def get_slots(expansion, slots):\n",
|
||||
" if expansion.tag != '':\n",
|
||||
" slots.append((expansion.tag, expansion.current_match))\n",
|
||||
" return\n",
|
||||
"\n",
|
||||
" for child in expansion.children:\n",
|
||||
" get_slots(child, slots)\n",
|
||||
"\n",
|
||||
" if not expansion.children and isinstance(expansion, jsgf.NamedRuleRef):\n",
|
||||
" get_slots(expansion.referenced_rule.expansion, slots)\n",
|
||||
"\n",
|
||||
"get_dialog_act(matched[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "91eaed10-e8c3-4429-a232-d317b84ec38f",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'act': 'book',\n",
|
||||
" 'slots': [('day', 'jutro'), ('hour', 'dziesiata'), ('size', 'trzech')]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def nlu(utterance):\n",
|
||||
" matched = book_grammar.find_matching_rules(utterance)\n",
|
||||
" if not matched:\n",
|
||||
" matched = order_grammar.find_matching_rules(utterance)\n",
|
||||
" if not matched:\n",
|
||||
" matched = recommend_grammar.find_matching_rules(utterance)\n",
|
||||
"\n",
|
||||
" if matched:\n",
|
||||
" return get_dialog_act(matched[0])\n",
|
||||
" else:\n",
|
||||
" return {'act': 'null', 'slots': []}\n",
|
||||
"\n",
|
||||
"nlu('chcialbym zarezerwowac stolik na jutro na godzine dziesiata dla trzech osob')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "d85e8543-be59-4c5f-b9f9-27631a899c2a",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'act': 'recommend', 'slots': [('dish', 'makarony')]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"nlu('jakie makarony polecasz')"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
Loading…
Reference in New Issue
Block a user