2020-04-21 22:40:42 +02:00
|
|
|
#!/bin/bash
|
|
|
|
|
|
|
|
# Prepare hypothesis file based on column 2
|
|
|
|
cut -f2 'wikiniews_results.tsv' > hypothesis.txt
|
|
|
|
# Prepare reference file based on column 3
|
|
|
|
cut -f3 'wikiniews_results.tsv' > reference.txt
|
|
|
|
|
|
|
|
# awk transfer txt to .trn
|
2020-04-21 22:53:15 +02:00
|
|
|
awk 'BEGIN{FS=OFS="\t"}{print $0,"(sp1_"NR")"}' < reference.txt > reference.trn
|
|
|
|
awk 'BEGIN{FS=OFS="\t"}{print $0,"(sp1_"NR")"}' < hypothesis.txt > hypothesis.trn
|
2020-04-21 23:09:17 +02:00
|
|
|
|
2020-04-22 12:45:54 +02:00
|
|
|
# use sclite to calculate WER, actually we need only lines starting with 'Scores'
|
|
|
|
sclite -f 0 -r reference.trn trn -h hypothesis.trn trn -e utf-8 -i rm -o all stdout | grep "Scores:" > wer_results.txt
|
2020-04-22 13:29:35 +02:00
|
|
|
|
2020-04-22 13:37:00 +02:00
|
|
|
cat wer_results.txt | awk '{print ( ($7 + $8 + $9) / ($7 + $8 + &6) ) * 100;}' >> wer_per_line.txt
|
2020-04-22 13:29:35 +02:00
|
|
|
paste wer_per_line.txt wikiniews_results.tsv
|
2020-04-22 13:37:00 +02:00
|
|
|
|
|
|
|
# calculate mean WER for all records
|
|
|
|
awk '{sum += &1; n++} END { print sum / n; }' < wer_per_lines.txt > wer.txt
|