diff --git a/Jenkinsfile b/Jenkinsfile index 23451e8..d176540 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -10,6 +10,11 @@ pipeline { sh './script.sh' //sh 'cat ' archiveArtifacts 'output.txt' + sh 'chmod 755 ./WER.sh' + sh './WER.sh' + + + } } } diff --git a/WER.sh b/WER.sh new file mode 100644 index 0000000..c52c568 --- /dev/null +++ b/WER.sh @@ -0,0 +1,6 @@ +cut -f2 wikiniews_results.tsv > hypothesis.txt && cut -f3 wikiniews_results.tsv > reference.txt +awk 'BEGIN{FS=OFS="\t"}{print $0,"(sp1_"NR")"}' < hypothesis.txt > hypothesis.trn +awk 'BEGIN{FS=OFS="\t"}{print $0,"(sp1_"NR")"}' < reference.txt > reference.trn +sclite -f 0 -r reference.trn trn -h hypothesis.trn trn -e utf-8 -i rm -o all stdout > sclite_results.txt +python3 calc_wer.py + diff --git a/calc_wer.py b/calc_wer.py new file mode 100644 index 0000000..727b990 --- /dev/null +++ b/calc_wer.py @@ -0,0 +1,52 @@ +import sys +import pandas as pd + +correct, insertion, deletion, substitution = [] , [] , [] , [] +all_wers = [] + + +srr = 0 + + +def wer(s,d,c,i): + return (s + d + i )/(s + d + c) + +for line in sys.stdin: + if line.startswith('Scores'): + line = line.split(' ') [5:] + c = int(line[0]) + s = int(line[1]) + d = int(line[2]) + i = int(line[3]) + + if d == 0 and s == 0 and i == 0: + srr +=1 + + correct.append(c) + substitution.append(s) + deletion.append(d) + insertion.append(i) + all_wers.append(wer(s,d,c,i)) + + + + +all_c = sum(correct) +all_s = sum(substitution) +all_d = sum(deletion) +all_i = sum(insertion) + +total_wer = wer(all_s,all_d,all_c,all_i) + +print(total_wer) + +data = pd.read_csv('wikiniews_results.tsv',sep = '\t') +df = pd.DataFrame(data) +df.insert(4,'',all_wers) +df.to_csv('wikiniews_results.tsv',sep = '\t') + +srr = srr/len(all_wers) + +with open('wer_srr.txt', 'w+') as f: + f.write("{} {}".format(srr,total_wer)) +