s407608-mlworkshops/evaluation.sh
2019-04-26 15:35:03 +02:00

27 lines
869 B
Bash

file1=$1
cut -f2 $file1 > reference.txt
cut -f3 $file1 > hypothesis.txt
awk 'BEGIN{FS=OFS="\t"}{print $0,"(sp1_"NR")"}' < reference.txt > reference.trn
awk 'BEGIN{FS=OFS="\t"}{print $0,"(sp1_"NR")"}' < hypothesis.txt > hypothesis.trn
# results of S C I D
sclite -f 0 -r reference.trn trn -h hypothesis.trn trn -e utf-8 -i rm -o all stdout > sclite_results.txt
# only numbers
cat sclite_results.txt | grep -oP '(?<=Scores: \(#C #S #D #I\) ).*' > to_wer.txt
# caluclate wer
awk '{ print ($2 + $3 + $4)/($2 + $3 + $1)}' to_wer.txt > wer.txt
# paste results
paste $file1 wer.txt > wikiniews_results2.tsv
# calculate avg wer
awk '{ total += $1; count++ } END { print total/count }' wer.txt > metric_aggregated.txt
# caculate srr
awk '{count++ ; if ($2 == 0 && $3 == 0 && $4 == 0) correct += 1 } END { print correct/count }' to_wer.txt >> metric_aggregated.txt