diff --git a/Dockerfile b/Dockerfile index bf3941c..0fc87fc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,6 +3,7 @@ FROM ubuntu:latest RUN apt update -y && apt install -y make RUN apt install -y git RUN apt install -y gcc +RUN apt install -y python3 RUN gcc --version RUN apt install -y build-essential RUN git clone https://github.com/usnistgov/SCTK.git diff --git a/Jenkinsfile b/Jenkinsfile index 5352e3d..a283f2a 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -18,7 +18,7 @@ pipeline { } stage('archiveArtifacts') { steps { - archiveArtifacts 'results.txt' + archiveArtifacts 'results.txt, wer.txt, srr.txt' } } } diff --git a/count_metrics.py b/count_metrics.py new file mode 100755 index 0000000..b386e63 --- /dev/null +++ b/count_metrics.py @@ -0,0 +1,26 @@ +import sys + +if __name__ == '__main__': + word_error_rate = 0 + number_of_sentence = 0 + correct_sentences = 0 + + with open(sys.argv[1], 'r') as sclite_result, \ + open('wer.txt', 'w') as wer, \ + open('srr.txt', 'w') as srr: + + for line in sclite_result: + complited = False + if line[:7] == 'Scores:': + number_of_sentence += 1 + c, s, d, i = line.strip().split()[-4:] + word_error_rate = sum(map(int, [s, d, i])) / sum(map(int, [s, d, c])) + complited = True + + if complited: + wer.write(f'{word_error_rate:.5f}\n') + if word_error_rate == 0: + correct_sentences += 1 + + sentence_recognition_rate = correct_sentences / number_of_sentence + srr.write(f'{sentence_recognition_rate:.5f}\n') diff --git a/script.sh b/script.sh index b48be25..2c0e2ad 100755 --- a/script.sh +++ b/script.sh @@ -4,3 +4,5 @@ cut -f2 $1 | awk 'BEGIN{FS=OFS="\t"}{print $0,"(sp1_"NR")"}' > hypothesis.trn cut -f3 $1 | awk 'BEGIN{FS=OFS="\t"}{print $0,"(sp1_"NR")"}' > reference.trn sclite -f 0 -r reference.trn trn -h hypothesis.trn trn -e utf-8 -i rm -o all stdout > results.txt +python3 count_metrics.py results.txt +paste results.txt wer.txt > tmp.txt && mv tmp.txt results.txt