s442639-mlworkshops/xml_parser.py

38 lines
918 B
Python

import xml.etree.ElementTree as ET
import re
tree = ET.parse('tmp_metrics')
root = tree.getroot()
speaker = root[0]
correct_word_pattern = r'^C'
correct_word_regex = re.compile(correct_word_pattern, flags=re.MULTILINE)
partial_outputs = open("WERs", 'w')
SRR_counter = 0
WER_sum = 0
count = 0
for child in speaker:
count = count + 1
all_words = int(child.attrib["word_cnt"])
correct = correct_word_regex.findall(child.text)
partial_outputs.write(str(len(correct)/all_words) + '\n')
WER_sum = WER_sum + len(correct)/all_words
if len(correct) == all_words:
SRR_counter = SRR_counter + 1
"""if len(correct) != all_words:
print(child.text)
print(len(correct), '/', all_words)"""
statistics = open("statistics", 'w')
statistics.write("WER: " + str(WER_sum/count) + '\n')
statistics.write("SRR: " + str(SRR_counter/count))
statistics.close()
partial_outputs.close()