systemy_dialogowe/data/parsing_semantyczny.ipynb

260 lines
6.4 KiB
Plaintext
Raw Normal View History

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "169e11e8-19c7-47ae-b8af-817e2e474ee7",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting book.jsgf\n"
]
}
],
"source": [
"%%writefile book.jsgf\n",
"#JSGF V1.0 UTF-8 pl;\n",
"\n",
"grammar book;\n",
"\n",
"public <rezerwuj> = chcialbym zarezerwowac stolik <dzien_rezerwacji> <godzina_rezerwacji> <liczba_osob> ;\n",
"\n",
"<dzien_rezerwacji> = na <dzien> {day};\n",
"\n",
"<dzien> = dzisiaj | jutro | poniedzialek | wtorek | srode | czwartek | piatek | sobote | niedziele;\n",
"\n",
"<godzina_rezerwacji> = na [godzine] <godzina_z_minutami> {hour};\n",
"\n",
"<godzina_z_minutami> = <godzina> [<minuty>];\n",
"\n",
"<godzina> = dziewiata | dziesiata | jedenasta | dwunasta;\n",
"\n",
"<minuty> = pietnascie | trzydziesci;\n",
"\n",
"<liczba_osob> = (na | dla) <liczba> {size} osob;\n",
"\n",
"<liczba> = dwie | dwoch | trzy | trzech | cztery | czterech | piec | pieciu;"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "88cc9879-a59c-4c90-adb6-1022c42913e6",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Overwriting order.jsgf\n"
]
}
],
"source": [
"%%writefile order.jsgf\n",
"#JSGF V1.0 UTF-8 pl;\n",
"\n",
"grammar order;\n",
"\n",
"public <zamow> = (chcialbym | chcialabym) zamowic <danie> <stolik>;\n",
"\n",
"<danie> = <potrawa> {dish};\n",
"\n",
"<potrawa> = pizza peperoni | spaghetti carbonara | burger szefa;\n",
"\n",
"<stolik> = (do stolika | na stolik ) <numer_stolika> {stolik};\n",
"\n",
"<numer_stolika> = 100 | 101 | 102 | 103 | 104 | 105;"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "57fb9886-0232-4dd9-b385-7cae1b068f6e",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"Grammar(version=1.0, charset=UTF-8, language=pl, name=order)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import jsgf\n",
"\n",
"book_grammar = jsgf.parse_grammar_file('book.jsgf')\n",
"order_grammar = jsgf.parse_grammar_file('order.jsgf')\n",
"order_grammar"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "8f5acde6-4f3c-425d-a202-f361b219922d",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[Rule(name='rezerwuj', visible=True, expansion=Sequence(Literal('chcialbym zarezerwowac stolik'), NamedRuleRef('dzien_rezerwacji'), NamedRuleRef('godzina_rezerwacji'), NamedRuleRef('liczba_osob')))]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"utterance = 'chcialbym zarezerwowac stolik na jutro na godzine dwunasta trzydziesci na piec osob'\n",
"matched = book_grammar.find_matching_rules(utterance)\n",
"if not matched:\n",
" matched = order_grammar.find_matching_rules(utterance)\n",
"matched"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "6cb7d2ba-d8ee-403a-bbf3-d7ea3e0475ba",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[Rule(name='zamow', visible=True, expansion=Sequence(ParsedAlternativeSet(Literal('chcialbym'), Literal('chcialabym')), Literal('zamowic'), NamedRuleRef('danie'), NamedRuleRef('stolik')))]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"utterance = 'chcialbym zamowic burger szefa do stolika 104'\n",
"matched = book_grammar.find_matching_rules(utterance)\n",
"if not matched:\n",
" matched = order_grammar.find_matching_rules(utterance)\n",
"matched"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "c914cc3d-b297-4e05-917c-6a2f09c0f8a9",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"{'act': 'order', 'slots': [('dish', 'burger szefa'), ('stolik', '104')]}"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def get_dialog_act(rule):\n",
" slots = []\n",
" get_slots(rule.expansion, slots)\n",
" return {'act': rule.grammar.name, 'slots': slots}\n",
"\n",
"def get_slots(expansion, slots):\n",
" if expansion.tag != '':\n",
" slots.append((expansion.tag, expansion.current_match))\n",
" return\n",
"\n",
" for child in expansion.children:\n",
" get_slots(child, slots)\n",
"\n",
" if not expansion.children and isinstance(expansion, jsgf.NamedRuleRef):\n",
" get_slots(expansion.referenced_rule.expansion, slots)\n",
"\n",
"get_dialog_act(matched[0])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "91eaed10-e8c3-4429-a232-d317b84ec38f",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"{'act': 'book',\n",
" 'slots': [('day', 'jutro'), ('hour', 'dziesiata'), ('size', 'trzech')]}"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def nlu(utterance):\n",
" matched = book_grammar.find_matching_rules(utterance)\n",
"\n",
" if matched:\n",
" return get_dialog_act(matched[0])\n",
" else:\n",
" return {'act': 'null', 'slots': []}\n",
"\n",
"nlu('chcialbym zarezerwowac stolik na jutro na godzine dziesiata dla trzech osob')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d85e8543-be59-4c5f-b9f9-27631a899c2a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}