Compare commits
No commits in common. "56eb2b45dadf20c59f01fd54b5a3fc74ed79f533" and "f78b47f176f8a77e9697647aacf9f42ea444616c" have entirely different histories.
56eb2b45da
...
f78b47f176
57
evaluate.py
57
evaluate.py
@ -28,8 +28,6 @@ false_negatives = 0
|
||||
|
||||
acts_recognized = defaultdict(int)
|
||||
acts_not_recognized = defaultdict(int)
|
||||
false_negatives = 0
|
||||
false_positives = 0
|
||||
|
||||
for df in data_files:
|
||||
if len(df.columns) == 3:
|
||||
@ -42,33 +40,48 @@ for df in data_files:
|
||||
user_speeches = user_speech_rows["message"]
|
||||
entries_count = len(user_speeches)
|
||||
|
||||
found_rules = user_speeches.apply(lambda x: grammar.find_matching_rules(decode_prompt(x)))
|
||||
parsed = user_speeches.apply(lambda x: bool(grammar.find_matching_rules(decode_prompt(x))))
|
||||
|
||||
parsed = user_speeches.apply(
|
||||
lambda x: bool(grammar.find_matching_rules(decode_prompt(x))))
|
||||
true_count = parsed.sum()
|
||||
false_count = len(parsed) - true_count
|
||||
recognized += true_count
|
||||
unrecognized += false_count
|
||||
|
||||
for line, rules in zip(df.iterrows(), found_rules):
|
||||
act = line[1]['act'].split('(')[0]
|
||||
if len(rules) > 0:
|
||||
recognized_act = rules[0].name
|
||||
if recognized_act in act:
|
||||
true_positives += 1
|
||||
else:
|
||||
false_positives += 1
|
||||
acts_not_recognized[act] += 1
|
||||
else:
|
||||
false_negatives += 1
|
||||
acts_not_recognized[act] += 1
|
||||
for line, correct in zip(df.iterrows(), parsed):
|
||||
acts_recognized[line[1]['act'].split('(')[0]] += int(correct)
|
||||
acts_not_recognized[line[1]['act'].split('(')[0]] += int(not(correct))
|
||||
|
||||
accuracy = recognized / (recognized + unrecognized)
|
||||
precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) != 0 else 0
|
||||
recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) != 0 else 0
|
||||
|
||||
print(f"Recognized user utterances: {recognized}")
|
||||
print(f"Unrecognized user utterances: {unrecognized}")
|
||||
print(f"Accuracy: {accuracy}")
|
||||
print(f"Precision: {precision}")
|
||||
print(f"Recall: {recall}")
|
||||
print(f"Accuracy: {recognized/(recognized+unrecognized)}")
|
||||
|
||||
|
||||
precision_per_class = {}
|
||||
recall_per_class = {}
|
||||
|
||||
for act in acts_recognized.keys():
|
||||
true_positives = acts_recognized[act]
|
||||
false_negatives = acts_not_recognized[act]
|
||||
false_positives = recognized - true_positives
|
||||
|
||||
precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) != 0 else 0
|
||||
recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) != 0 else 0
|
||||
|
||||
precision_per_class[act] = precision
|
||||
recall_per_class[act] = recall
|
||||
|
||||
average_precision = sum(precision_per_class.values()) / len(precision_per_class)
|
||||
average_recall = sum(recall_per_class.values()) / len(recall_per_class)
|
||||
|
||||
print("\nPrecision per class:")
|
||||
for act, precision in precision_per_class.items():
|
||||
print(f"{act}: {precision}")
|
||||
|
||||
print("\nRecall per class:")
|
||||
for act, recall in recall_per_class.items():
|
||||
print(f"{act}: {recall}")
|
||||
|
||||
print(f"\nAverage Precision: {average_precision}")
|
||||
print(f"Average Recall: {average_recall}")
|
||||
|
Loading…
Reference in New Issue
Block a user