diff --git a/chatbot/modules/nlu.py b/chatbot/modules/nlu.py new file mode 100644 index 0000000..6e387ba --- /dev/null +++ b/chatbot/modules/nlu.py @@ -0,0 +1,42 @@ +import copy +from copy import deepcopy +import json +import os +import jsgf + + +class NLU: + def __init__(self): + self.grammars = [ + jsgf.parse_grammar_file(f"grammars/{file_name}") + for file_name in os.listdir("grammars") + ] + + def get_dialog_act(self, rule): + slots = [] + self.get_slots(rule.expansion, slots) + return {"act": rule.grammar.name, "slots": slots} + + def get_slots(self, expansion, slots): + if expansion.tag != "": + slots.append((expansion.tag, expansion.current_match)) + return + + for child in expansion.children: + self.get_slots(child, slots) + + if not expansion.children and isinstance(expansion, jsgf.NamedRuleRef): + self.get_slots(expansion.referenced_rule.expansion, slots) + + def match(self, utterance): + list_of_illegal_character = [",", ".", "'", "?", "!", ":", "-", "/"] + for illegal_character in list_of_illegal_character[:-2]: + utterance = utterance.replace(f"{illegal_character}", "") + for illegal_character in list_of_illegal_character[-2:]: + utterance = utterance.replace(f"{illegal_character}", " ") + + for grammar in self.grammars: + matched = grammar.find_matching_rules(utterance.lower()) + if matched: + return self.get_dialog_act(matched[0]) + return {"act": "null", "slots": []} diff --git a/evaluate.py b/evaluate.py new file mode 100644 index 0000000..e53292c --- /dev/null +++ b/evaluate.py @@ -0,0 +1,26 @@ +import os +import re +import pandas as pd +import numpy as np +from chatbot.modules.nlu import NLU + +rows = 0 +hits = 0 + +nlu = NLU() + +for file_name in os.listdir("data"): + df = pd.read_csv(f"data/{file_name}", sep="\t", names=["user", "sentence", "acts"]) + df = df[df.user == "user"] + data = np.array(df) + + for row in data: + rows += 1 + sentence = row[1] + cleaned_text = re.sub(r'\([^)]*\)', '', row[2]) + user_acts = cleaned_text.split("&") + nlu_match = nlu.match(sentence) + if nlu_match["act"] in user_acts: + hits += 1 + +print(f"Accuracy: {(hits / rows) * 100}") diff --git a/grammars/inform.jsgf b/grammars/inform.jsgf index 99af9db..68bc7c6 100644 --- a/grammars/inform.jsgf +++ b/grammars/inform.jsgf @@ -16,7 +16,7 @@ public = zł; public = jeden | dwie | trzy | cztery | pięć | sześć | siedem | osiem | dziewięć | dziesięć; public = ulica miasto kod pocztowy ; -public = ul. ; +public = ul ; public = ; public = -; diff --git a/grammars/welcomemsg.jsgf b/grammars/welcomemsg.jsgf index e4e326d..063f7b8 100644 --- a/grammars/welcomemsg.jsgf +++ b/grammars/welcomemsg.jsgf @@ -3,4 +3,4 @@ grammar welcomemsg; public = ; - = Witamy w sklepie internetowym XYZ W czym mogę pomóc | Witaj! W czym mogę Ci dzisiaj pomóc| Witamy w sklepie internetowym XYZ W swojej ofercie mamy artykuły ogrodowe meblowe oraz kosmetyki | Witam tutaj sklep wielobranzowy w czym moge pomoc; \ No newline at end of file + = Witamy w sklepie internetowym XYZ W czym mogę pomóc | Witaj W czym mogę Ci dzisiaj pomóc | Witamy w sklepie internetowym XYZ W swojej ofercie mamy artykuły ogrodowe meblowe oraz kosmetyki | Witam tutaj sklep wielobranzowy w czym moge pomoc | cześć chciałbym kupić {product}; \ No newline at end of file diff --git a/main.py b/main.py index 912c37d..9dc34fe 100644 --- a/main.py +++ b/main.py @@ -2,6 +2,7 @@ import jsgf request_grammar = jsgf.parse_grammar_file('./grammars/request.jsgf') + def main(): utterance = 'Czy macie w ofercie balsam do ciała' matched = request_grammar.find_matching_rules(utterance)