#!/bin/bash # Prepare hypothesis file based on column 2 cut -f2 'wikiniews_results.tsv' > hypothesis.txt # Prepare reference file based on column 3 cut -f3 'wikiniews_results.tsv' > reference.txt # awk transfer txt to .trn awk 'BEGIN{FS=OFS="\t"}{print $0,"(sp1_"NR")"}' < reference.txt > reference.trn awk 'BEGIN{FS=OFS="\t"}{print $0,"(sp1_"NR")"}' < hypothesis.txt > hypothesis.trn # use sclite to calculate WER, actually we need only lines starting with 'Scores' sclite -f 0 -r reference.trn trn -h hypothesis.trn trn -e utf-8 -i rm -o all stdout | grep "Scores:" > wer_results.txt cat wer_results.txt | awk '{print ( ($7 + $8 + $9) / ($7 + $8 + $6) ) * 100;}' >> wer_per_line.txt awk '{getline l < "wer_per_line.txt"; print $0"\t"l} ' wikiniews_results.tsv > wikinews_results.tsv # calculate mean WER for all records awk '{sum += $1; n++} END { print sum / n; }' < wer_per_line.txt > wer.txt # calculate SSR awk '{if ($1 == 0) acc += 1;} END { print acc / NR; }' < wer_per_line.txt > srr.txt rm wer_per_line.txt rm hypothesis.txt rm reference.txt rm wer_results.txt