2022-05-25 12:21:06 +02:00
|
|
|
import jsgf
|
|
|
|
from os import listdir
|
|
|
|
from os.path import isfile, join
|
2022-04-19 21:25:16 +02:00
|
|
|
|
2022-05-25 12:21:06 +02:00
|
|
|
mypath = "../semantic_parser/gramatics/"
|
|
|
|
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
|
2022-04-19 23:22:20 +02:00
|
|
|
|
2022-05-25 12:21:06 +02:00
|
|
|
grammars = []
|
2022-04-19 21:25:16 +02:00
|
|
|
|
2022-05-25 12:21:06 +02:00
|
|
|
for grammarFile in onlyfiles:
|
|
|
|
grammar = jsgf.parse_grammar_file(mypath + grammarFile)
|
|
|
|
grammars.append(grammar)
|
|
|
|
|
|
|
|
def get_dialog_act(rule):
|
|
|
|
slots = []
|
|
|
|
get_slots(rule.expansion, slots)
|
|
|
|
return {'act': rule.grammar.name, 'slots': slots}
|
|
|
|
|
|
|
|
def get_slots(expansion, slots):
|
|
|
|
if expansion.tag != '':
|
|
|
|
slots.append((expansion.tag, expansion.current_match))
|
|
|
|
return
|
|
|
|
|
|
|
|
for child in expansion.children:
|
|
|
|
get_slots(child, slots)
|
|
|
|
|
|
|
|
if not expansion.children and isinstance(expansion, jsgf.NamedRuleRef):
|
|
|
|
get_slots(expansion.referenced_rule.expansion, slots)
|
|
|
|
|
|
|
|
def nlu(utterance):
|
|
|
|
matched = None
|
|
|
|
for grammar in grammars:
|
|
|
|
matched = grammar.find_matching_rules(utterance)
|
|
|
|
if matched:
|
|
|
|
break
|
|
|
|
|
|
|
|
if matched:
|
|
|
|
return get_dialog_act(matched[0])
|
|
|
|
else:
|
|
|
|
return {'act': 'null', 'slots': []}
|
2022-04-19 21:25:16 +02:00
|
|
|
|
|
|
|
def analizator_jezyka_naturalnego(text):
|
|
|
|
text = text_preprocess(text)
|
2022-05-25 12:21:06 +02:00
|
|
|
frame = nlu(text)
|
2022-04-19 21:25:16 +02:00
|
|
|
return frame
|
|
|
|
|
|
|
|
def text_preprocess(text):
|
|
|
|
text = text.lower()
|
2022-06-01 10:59:15 +02:00
|
|
|
text = text.replace("ą", "a")
|
|
|
|
text = text.replace("ć", "c")
|
|
|
|
text = text.replace("ę", "e")
|
|
|
|
text = text.replace("ł", "l")
|
|
|
|
text = text.replace("ń", "n")
|
|
|
|
text = text.replace("ó", "o")
|
|
|
|
text = text.replace("ś", "s")
|
|
|
|
text = text.replace("ź", "z")
|
|
|
|
text = text.replace("ż", "z")
|
|
|
|
text = text.replace("\n", " ")
|
|
|
|
text = text.replace("\t", " ")
|
|
|
|
text = text.replace(" ", " ")
|
2022-04-19 21:25:16 +02:00
|
|
|
return text
|