import xml.etree.ElementTree as ET import re tree = ET.parse('tmp_metrics') root = tree.getroot() speaker = root[0] correct_word_pattern = r'^C' correct_word_regex = re.compile(correct_word_pattern, flags=re.MULTILINE) partial_outputs = open("WERs", 'w') SRR_counter = 0 WER_sum = 0 count = 0 for child in speaker: count = count + 1 all_words = int(child.attrib["word_cnt"]) correct = correct_word_regex.findall(child.text) partial_outputs.write(str(len(correct)/all_words) + '\n') WER_sum = WER_sum + len(correct)/all_words if len(correct) == all_words: SRR_counter = SRR_counter + 1 """if len(correct) != all_words: print(child.text) print(len(correct), '/', all_words)""" statistics = open("statistics", 'w') statistics.write("WER: " + str(WER_sum/count) + '\n') statistics.write("SRR: " + str(SRR_counter/count)) statistics.close() partial_outputs.close()