import re import jsgf class Nlu: def __init__(self): with open('rules.jsgf', 'r', encoding='utf-8') as f: lines = f.readlines() self.rules_grammar = jsgf.parse_grammar_string("".join(lines)) self.acts = { "request": { 'triggers': ['jak', 'kiedy'], 'parameters': ['imie'] } } def get_slots(self, expansion, slots): if expansion.tag != '': slots.append((expansion.tag, expansion.current_match)) return for child in expansion.children: self.get_slots(child, slots) if not expansion.children and isinstance(expansion, jsgf.NamedRuleRef): self.get_slots(expansion.referenced_rule.expansion, slots) def get_dialog_act(self, rule): slots = [] self.get_slots(rule.expansion, slots) return {'act': rule.grammar.name, 'slots': slots} def tokenize(self, string): clean_string = self.get_str_cleaned(string) matched = self.rules_grammar.find_matching_rules(clean_string) if matched: return self.get_dialog_act(matched[0]) else: return {'act': 'null', 'slots': []} def get_str_cleaned(self, str_dirty): punctuation = '!"#$%&\'()*+,-./:;<=>?@[\\\\]^_`{|}~' new_str = str_dirty.lower() new_str = re.sub(' +', ' ', new_str) for char in punctuation: new_str = new_str.replace(char,'') return new_str # TODO: Refactor return (act, param) nlu = Nlu() print(nlu.tokenize('chciałbym kupić bilet do Krakow')) print(nlu.tokenize('chciałbym kupić bilet z Poznan')) print(nlu.tokenize('w piątek')) print(nlu.tokenize('4 bilety')) print(nlu.tokenize('2 bilety z ulgą studencką')) print(nlu.tokenize('miejsce pod oknem')) print(nlu.tokenize('druga klasa'))