Added parser and called it in script.sh
This commit is contained in:
parent
e5db46805c
commit
65718229aa
@ -3,3 +3,6 @@ awk -f get_3rd.awk < wikiniews_results.tsv > references.txt
|
||||
awk -f txt_to_tsr_converter.awk < references.txt > references.trn
|
||||
awk -f txt_to_tsr_converter.awk < hypotheses.txt > hypotheses.trn
|
||||
sclite -f 0 -r references.trn trn -h hypotheses.trn trn -e utf-8 -i rm -o all -p > tmp_metrics
|
||||
|
||||
python3 ./xml_parser.py
|
||||
paste wikiniews_results.tsv WERs > wikiniews_results_with_WERs.tsv
|
37
xml_parser.py
Normal file
37
xml_parser.py
Normal file
@ -0,0 +1,37 @@
|
||||
import xml.etree.ElementTree as ET
|
||||
import re
|
||||
|
||||
tree = ET.parse('tmp_metrics')
|
||||
root = tree.getroot()
|
||||
|
||||
speaker = root[0]
|
||||
|
||||
correct_word_pattern = r'^C'
|
||||
correct_word_regex = re.compile(correct_word_pattern, flags=re.MULTILINE)
|
||||
|
||||
partial_outputs = open("WERs", 'w')
|
||||
|
||||
SRR_counter = 0
|
||||
WER_sum = 0
|
||||
count = 0
|
||||
|
||||
for child in speaker:
|
||||
count = count + 1
|
||||
all_words = int(child.attrib["word_cnt"])
|
||||
correct = correct_word_regex.findall(child.text)
|
||||
partial_outputs.write(str(len(correct)/all_words) + '\n')
|
||||
WER_sum = WER_sum + len(correct)/all_words
|
||||
if len(correct) == all_words:
|
||||
SRR_counter = SRR_counter + 1
|
||||
|
||||
"""if len(correct) != all_words:
|
||||
print(child.text)
|
||||
print(len(correct), '/', all_words)"""
|
||||
|
||||
statistics = open("statistics", 'w')
|
||||
|
||||
statistics.write("WER: " + str(WER_sum/count) + '\n')
|
||||
statistics.write("SRR: " + str(SRR_counter/count))
|
||||
|
||||
statistics.close()
|
||||
partial_outputs.close()
|
Loading…
Reference in New Issue
Block a user