Systemy_dialogowe/system_mockup/analizator_jezyka_naturalnego.py

62 lines
1.6 KiB
Python
Raw Normal View History

2022-05-25 12:21:06 +02:00
import jsgf
from os import listdir
from os.path import isfile, join
2022-04-19 21:25:16 +02:00
2022-05-25 12:21:06 +02:00
mypath = "../semantic_parser/gramatics/"
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
2022-04-19 23:22:20 +02:00
2022-05-25 12:21:06 +02:00
grammars = []
2022-04-19 21:25:16 +02:00
2022-05-25 12:21:06 +02:00
for grammarFile in onlyfiles:
grammar = jsgf.parse_grammar_file(mypath + grammarFile)
grammars.append(grammar)
def get_dialog_act(rule):
slots = []
get_slots(rule.expansion, slots)
return {'act': rule.grammar.name, 'slots': slots}
def get_slots(expansion, slots):
if expansion.tag != '':
slots.append((expansion.tag, expansion.current_match))
return
for child in expansion.children:
get_slots(child, slots)
if not expansion.children and isinstance(expansion, jsgf.NamedRuleRef):
get_slots(expansion.referenced_rule.expansion, slots)
def nlu(utterance):
matched = None
for grammar in grammars:
matched = grammar.find_matching_rules(utterance)
if matched:
break
if matched:
return get_dialog_act(matched[0])
else:
return {'act': 'null', 'slots': []}
2022-04-19 21:25:16 +02:00
def analizator_jezyka_naturalnego(text):
text = text_preprocess(text)
2022-05-25 12:21:06 +02:00
frame = nlu(text)
2022-04-19 21:25:16 +02:00
return frame
def text_preprocess(text):
text = text.lower()
text = text.replace("ą", "a")
text = text.replace("ć", "c")
text = text.replace("ę", "e")
text = text.replace("ł", "l")
text = text.replace("ń", "n")
text = text.replace("ó", "o")
text = text.replace("ś", "s")
text = text.replace("ź", "z")
text = text.replace("ż", "z")
text = text.replace("\n", " ")
text = text.replace("\t", " ")
text = text.replace(" ", " ")
2022-04-19 21:25:16 +02:00
return text