2024-05-05 16:54:03 +02:00
|
|
|
import os
|
|
|
|
import pandas as pd
|
|
|
|
import jsgf
|
|
|
|
|
|
|
|
grammar = jsgf.parse_grammar_file('book.jsgf')
|
|
|
|
data_files = []
|
|
|
|
|
|
|
|
for filename in os.listdir("data"):
|
|
|
|
f = os.path.join("data", filename)
|
|
|
|
if os.path.isfile(f):
|
|
|
|
data_files.append(pd.read_csv(f, sep='\t', header=None))
|
|
|
|
|
|
|
|
recognized = 0
|
|
|
|
unrecognized = 0
|
|
|
|
|
|
|
|
for df in data_files:
|
|
|
|
if len(df.columns)==3:
|
|
|
|
df.columns = ["agent", "message", "act"]
|
|
|
|
elif len(df.columns)==2:
|
|
|
|
df.columns = ["agent", "message"]
|
|
|
|
else:
|
|
|
|
continue
|
|
|
|
user_speech_rows = df[df['agent'] == "user"]
|
|
|
|
user_speeches = user_speech_rows["message"]
|
|
|
|
entries_count = len(user_speeches)
|
|
|
|
parsed = user_speeches.apply(lambda x: bool(grammar.find_matching_rules(x)))
|
|
|
|
true_count = parsed.sum()
|
|
|
|
false_count = len(parsed) - true_count
|
|
|
|
recognized += true_count
|
|
|
|
unrecognized += false_count
|
|
|
|
|
|
|
|
print(f"Recognized user utterances: {recognized}")
|
|
|
|
print(f"Unrecognized user utterances: {unrecognized}")
|
|
|
|
print(f"Accuracy: {recognized/(recognized+unrecognized)}")
|