metryki10
This commit is contained in:
parent
0619df827c
commit
2eaf6584d1
@ -4,6 +4,7 @@ RUN apt update -y && apt install -y make
|
|||||||
RUN apt install -y git
|
RUN apt install -y git
|
||||||
RUN apt install -y gcc
|
RUN apt install -y gcc
|
||||||
RUN apt install -y gawk
|
RUN apt install -y gawk
|
||||||
|
RUN apt install -y python3
|
||||||
RUN gcc --version
|
RUN gcc --version
|
||||||
RUN apt install -y build-essential
|
RUN apt install -y build-essential
|
||||||
RUN git clone https://github.com/usnistgov/SCTK.git
|
RUN git clone https://github.com/usnistgov/SCTK.git
|
||||||
|
2
Jenkinsfile
vendored
2
Jenkinsfile
vendored
@ -11,5 +11,7 @@ node{
|
|||||||
sh 'awk -f file3.awk < reference.txt > reference.trn'
|
sh 'awk -f file3.awk < reference.txt > reference.trn'
|
||||||
sh 'awk -f file3.awk < hypothesis.txt > hypothesis.trn'
|
sh 'awk -f file3.awk < hypothesis.txt > hypothesis.trn'
|
||||||
sh 'sclite -f 0 -r reference.trn trn -h hypothesis.trn trn -e utf-8 -i rm -o all -p > tmp_metrics'
|
sh 'sclite -f 0 -r reference.trn trn -h hypothesis.trn trn -e utf-8 -i rm -o all -p > tmp_metrics'
|
||||||
|
sh 'python3 ./xml_parser.py'
|
||||||
|
sh 'paste wikiniews_results.tsv WERs > wikiniews_results_with_WERs.tsv'
|
||||||
}
|
}
|
||||||
}
|
}
|
37
parser.py
Normal file
37
parser.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
import re
|
||||||
|
|
||||||
|
tree = ET.parse('tmp_metrics')
|
||||||
|
root = tree.getroot()
|
||||||
|
|
||||||
|
speaker = root[0]
|
||||||
|
|
||||||
|
correct_word_pattern = r'^C'
|
||||||
|
correct_word_regex = re.compile(correct_word_pattern, flags=re.MULTILINE)
|
||||||
|
|
||||||
|
partial_outputs = open("WERs", 'w')
|
||||||
|
|
||||||
|
SRR_counter = 0
|
||||||
|
WER_sum = 0
|
||||||
|
count = 0
|
||||||
|
|
||||||
|
for child in speaker:
|
||||||
|
count = count + 1
|
||||||
|
all_words = int(child.attrib["word_cnt"])
|
||||||
|
correct = correct_word_regex.findall(child.text)
|
||||||
|
partial_outputs.write(str(len(correct)/all_words) + '\n')
|
||||||
|
WER_sum = WER_sum + len(correct)/all_words
|
||||||
|
if len(correct) == all_words:
|
||||||
|
SRR_counter = SRR_counter + 1
|
||||||
|
|
||||||
|
"""if len(correct) != all_words:
|
||||||
|
print(child.text)
|
||||||
|
print(len(correct), '/', all_words)"""
|
||||||
|
|
||||||
|
statistics = open("statistics", 'w')
|
||||||
|
|
||||||
|
statistics.write("WER: " + str(WER_sum/count) + '\n')
|
||||||
|
statistics.write("SRR: " + str(SRR_counter/count))
|
||||||
|
|
||||||
|
statistics.close()
|
||||||
|
partial_outputs.close()
|
Loading…
Reference in New Issue
Block a user