JSGF evaluation + small NLU optimization

This commit is contained in:
Adrian Klessa 2024-05-05 16:54:03 +02:00
parent 9dffcd9369
commit 5c18bc5b9a
2 changed files with 36 additions and 5 deletions

View File

@ -26,7 +26,7 @@ class Model():
class NLU():
def __init__(self):
pass
self.book_grammar = jsgf.parse_grammar_file('book.jsgf')
def get_dialog_act(self, rule):
slots = []
@ -45,10 +45,7 @@ class NLU():
self.get_slots(expansion.referenced_rule.expansion, slots)
def __call__(self, prompt) -> Any:
book_grammar = jsgf.parse_grammar_file('book.jsgf')
matched = book_grammar.find_matching_rules(prompt)
matched = self.book_grammar.find_matching_rules(prompt)
if matched:
return self.get_dialog_act(matched[0])
else:

View File

@ -0,0 +1,34 @@
import os
import pandas as pd
import jsgf
grammar = jsgf.parse_grammar_file('book.jsgf')
data_files = []
for filename in os.listdir("data"):
f = os.path.join("data", filename)
if os.path.isfile(f):
data_files.append(pd.read_csv(f, sep='\t', header=None))
recognized = 0
unrecognized = 0
for df in data_files:
if len(df.columns)==3:
df.columns = ["agent", "message", "act"]
elif len(df.columns)==2:
df.columns = ["agent", "message"]
else:
continue
user_speech_rows = df[df['agent'] == "user"]
user_speeches = user_speech_rows["message"]
entries_count = len(user_speeches)
parsed = user_speeches.apply(lambda x: bool(grammar.find_matching_rules(x)))
true_count = parsed.sum()
false_count = len(parsed) - true_count
recognized += true_count
unrecognized += false_count
print(f"Recognized user utterances: {recognized}")
print(f"Unrecognized user utterances: {unrecognized}")
print(f"Accuracy: {recognized/(recognized+unrecognized)}")