2020-04-03 13:22:58 +02:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
cut -f2 wikiniews_results.tsv | awk 'BEGIN{FS=OFS="\t"}{print $0,"(sp1_"NR")"}' > hyp.trn
|
|
|
|
cut -f3 wikiniews_results.tsv | awk 'BEGIN{FS=OFS="\t"}{print $0,"(sp1_"NR")"}' > ref.trn
|
2020-04-03 13:32:59 +02:00
|
|
|
sclite -f 0 -r ref.trn trn -h hyp.trn trn -e utf-8 -i rm -o sum stdout | grep "Sum/Avg" | python -c "import sys; import re; print('\t'.join(re.findall(r'(\d+\.\d+)', sys.stdin.read())[-2:]))" > wer_srr.txt
|
2020-04-03 13:22:58 +02:00
|
|
|
cut -f1 wer_srr.txt > wer_total.txt
|
2020-04-03 14:00:11 +02:00
|
|
|
cut -f2 wer_srr.txt >> srr.txt
|
2020-04-03 13:22:58 +02:00
|
|
|
|
|
|
|
sclite -f 0 -r ref.trn trn -h hyp.trn trn -e utf-8 -i rm -o pra stdout | grep "Scores:" | sed 's/Scores: (#C #S #D #I) // ' | awk '{ c=$1; s=$2; d=$3; i=$4; print ($s+$d+$i)/($s+$d+$c)}' > wer_all.txt
|
2020-04-03 14:00:11 +02:00
|
|
|
awk '{ sum += $1; n++ } END { print sum / n; }' < wer_all.txt >> wer.txt
|
|
|
|
|
2020-04-03 13:28:40 +02:00
|
|
|
paste wikiniews_results.tsv wer.txt > wikinews_results.tsv
|
2020-04-03 13:22:58 +02:00
|
|
|
rm wikiniews_results.tsv
|
2020-04-03 14:00:11 +02:00
|
|
|
|
|
|
|
tail -n20 wer.txt | sponge wer.txt
|
|
|
|
tail -n20 srr.txt | sponge srr.txt
|