diff --git a/Jenkinsfile b/Jenkinsfile index 8172ba1..ecb4ff7 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -13,5 +13,13 @@ pipeline { archiveArtifacts 'line_count.txt' } } + stage('Calc metrics') { + steps { + sh label: '', script: './calc_metrics.sh' + archiveArtifacts 'wer.txt' + archiveArtifacts 'srr.txt' + archiveArtifacts 'wikinews_results.tsv' + } + } } } \ No newline at end of file diff --git a/calc_metrics.sh b/calc_metrics.sh new file mode 100755 index 0000000..0fe63ce --- /dev/null +++ b/calc_metrics.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +cut -f2 wikiniews_results.tsv | awk 'BEGIN{FS=OFS="\t"}{print $0,"(sp1_"NR")"}' > hyp.trn +cut -f3 wikiniews_results.tsv | awk 'BEGIN{FS=OFS="\t"}{print $0,"(sp1_"NR")"}' > ref.trn +sclite -f 0 -r ref.trn trn -h hyp.trn trn -e utf-8 -i rm -o sum stdout | grep "Sum/Avg" | python3 -c "import sys; import re; print('\t'.join(re.findall(r'(\d+\.\d+)', sys.stdin.read())[-2:]))" > wer_srr.txt +cut -f1 wer_srr.txt > wer_total.txt +cut -f2 wer_srr.txt > srr.txt + +sclite -f 0 -r ref.trn trn -h hyp.trn trn -e utf-8 -i rm -o pra stdout | grep "Scores:" | sed 's/Scores: (#C #S #D #I) // ' | awk '{ c=$1; s=$2; d=$3; i=$4; print ($s+$d+$i)/($s+$d+$c)}' > wer_all.txt +awk '{ sum += $1; n++ } END { print sum / n; }' < wer_all.txt > wer.txt +paste wikiniews_results.tsv wer.txt > wikinews_results.txt +rm wikiniews_results.tsv