62 lines
1.6 KiB
Python
62 lines
1.6 KiB
Python
import re
|
|
import jsgf
|
|
|
|
class Nlu:
|
|
def __init__(self):
|
|
with open('rules.jsgf', 'r', encoding='utf-8') as f:
|
|
lines = f.readlines()
|
|
self.rules_grammar = jsgf.parse_grammar_string("".join(lines))
|
|
self.acts = {
|
|
"request": {
|
|
'triggers': ['jak', 'kiedy'],
|
|
'parameters': ['imie']
|
|
}
|
|
}
|
|
|
|
|
|
def get_slots(self, expansion, slots):
|
|
if expansion.tag != '':
|
|
slots.append((expansion.tag, expansion.current_match))
|
|
return
|
|
|
|
for child in expansion.children:
|
|
self.get_slots(child, slots)
|
|
|
|
if not expansion.children and isinstance(expansion, jsgf.NamedRuleRef):
|
|
self.get_slots(expansion.referenced_rule.expansion, slots)
|
|
|
|
def get_dialog_act(self, rule):
|
|
slots = []
|
|
self.get_slots(rule.expansion, slots)
|
|
return {'act': rule.grammar.name, 'slots': slots}
|
|
|
|
|
|
def tokenize(self, string):
|
|
clean_string = self.get_str_cleaned(string)
|
|
|
|
matched = self.rules_grammar.find_matching_rules(clean_string)
|
|
|
|
if matched:
|
|
return self.get_dialog_act(matched[0])
|
|
else:
|
|
return {'act': 'null', 'slots': []}
|
|
|
|
|
|
def get_str_cleaned(self, str_dirty):
|
|
punctuation = '!"#$%&\'()*+,-./:;<=>?@[\\\\]^_`{|}~'
|
|
new_str = str_dirty.lower()
|
|
new_str = re.sub(' +', ' ', new_str)
|
|
|
|
for char in punctuation:
|
|
new_str = new_str.replace(char,'')
|
|
|
|
return new_str
|
|
|
|
# TODO: Refactor
|
|
|
|
|
|
return (act, param)
|
|
|
|
nlu = Nlu()
|
|
print(nlu.tokenize('chciałbym kupić bilet na pociąg z Poznan do Krakow'))
|