Compare commits
No commits in common. "53628482e764963fe0d0c3a8e57cdeeb8f5b8213" and "2a9a51fea8d4ac583389c5a768e9db57b5bba5b8" have entirely different histories.
53628482e7
...
2a9a51fea8
41
evaluate.py
41
evaluate.py
|
@ -3,7 +3,6 @@ import pandas as pd
|
||||||
import jsgf
|
import jsgf
|
||||||
from unidecode import unidecode
|
from unidecode import unidecode
|
||||||
import string
|
import string
|
||||||
from collections import defaultdict
|
|
||||||
|
|
||||||
|
|
||||||
def decode_prompt(prompt):
|
def decode_prompt(prompt):
|
||||||
|
@ -22,12 +21,6 @@ for filename in os.listdir("data"):
|
||||||
|
|
||||||
recognized = 0
|
recognized = 0
|
||||||
unrecognized = 0
|
unrecognized = 0
|
||||||
true_positives = 0
|
|
||||||
false_positives = 0
|
|
||||||
false_negatives = 0
|
|
||||||
|
|
||||||
acts_recognized = defaultdict(int)
|
|
||||||
acts_not_recognized = defaultdict(int)
|
|
||||||
|
|
||||||
for df in data_files:
|
for df in data_files:
|
||||||
if len(df.columns) == 3:
|
if len(df.columns) == 3:
|
||||||
|
@ -47,41 +40,7 @@ for df in data_files:
|
||||||
false_count = len(parsed) - true_count
|
false_count = len(parsed) - true_count
|
||||||
recognized += true_count
|
recognized += true_count
|
||||||
unrecognized += false_count
|
unrecognized += false_count
|
||||||
|
|
||||||
for line, correct in zip(df.iterrows(), parsed):
|
|
||||||
acts_recognized[line[1]['act'].split('(')[0]] += int(correct)
|
|
||||||
acts_not_recognized[line[1]['act'].split('(')[0]] += int(not(correct))
|
|
||||||
|
|
||||||
|
|
||||||
print(f"Recognized user utterances: {recognized}")
|
print(f"Recognized user utterances: {recognized}")
|
||||||
print(f"Unrecognized user utterances: {unrecognized}")
|
print(f"Unrecognized user utterances: {unrecognized}")
|
||||||
print(f"Accuracy: {recognized/(recognized+unrecognized)}")
|
print(f"Accuracy: {recognized/(recognized+unrecognized)}")
|
||||||
|
|
||||||
|
|
||||||
precision_per_class = {}
|
|
||||||
recall_per_class = {}
|
|
||||||
|
|
||||||
for act in acts_recognized.keys():
|
|
||||||
true_positives = acts_recognized[act]
|
|
||||||
false_negatives = acts_not_recognized[act]
|
|
||||||
false_positives = recognized - true_positives
|
|
||||||
|
|
||||||
precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) != 0 else 0
|
|
||||||
recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) != 0 else 0
|
|
||||||
|
|
||||||
precision_per_class[act] = precision
|
|
||||||
recall_per_class[act] = recall
|
|
||||||
|
|
||||||
average_precision = sum(precision_per_class.values()) / len(precision_per_class)
|
|
||||||
average_recall = sum(recall_per_class.values()) / len(recall_per_class)
|
|
||||||
|
|
||||||
print("\nPrecision per class:")
|
|
||||||
for act, precision in precision_per_class.items():
|
|
||||||
print(f"{act}: {precision}")
|
|
||||||
|
|
||||||
print("\nRecall per class:")
|
|
||||||
for act, recall in recall_per_class.items():
|
|
||||||
print(f"{act}: {recall}")
|
|
||||||
|
|
||||||
print(f"\nAverage Precision: {average_precision}")
|
|
||||||
print(f"Average Recall: {average_recall}")
|
|
||||||
|
|
Loading…
Reference in New Issue