diff --git a/Dockerfile b/Dockerfile index 9752dd3..1385dd7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,6 +4,7 @@ RUN apt update -y && apt install -y make RUN apt install -y git RUN apt install -y gcc RUN apt install -y gawk +RUN apt install -y python RUN gcc --version RUN apt install -y build-essential RUN git clone https://github.com/usnistgov/SCTK.git diff --git a/Jenkinsfile b/Jenkinsfile index fe9c05b..4694405 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -7,9 +7,10 @@ pipeline { checkout([$class: 'GitSCM', branches: [[name: '*/master']], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: '8cdbf19a-e62d-4807-9112-bc7a8d225074', url: 'https://git.wmi.amu.edu.pl/s416094/s416094-mlworkshops']]]) sh label: '', script: 'chmod 777 counter.sh' sh label: '', script: './counter.sh' - archiveArtifacts 'hypothesis.trn' - archiveArtifacts 'reference.trn' - archiveArtifacts 'results.txt' + archiveArtifacts 'wikiniews_results.tsv' + archiveArtifacts 'wer_avg.txt' + archiveArtifacts 'srr.txt' + archiveArtifacts 'wer_srr.txt' } } } diff --git a/counter.sh b/counter.sh index fbcd2d8..2feed96 100644 --- a/counter.sh +++ b/counter.sh @@ -1,4 +1,6 @@ awk 'BEGIN { FS="\t" } {print gensub("^\\*\\*", "\\\\**", "g", $2) "\t(sp1_"NR")"}' wikiniews_results.tsv > reference.trn awk 'BEGIN { FS="\t" } {print gensub("^\\*\\*", "\\\\**", "g", $3) "\t(sp1_"NR")"}' wikiniews_results.tsv > hypothesis.trn -sclite -f 0 -r reference.trn trn -h hypothesis.trn trn -e utf-8 -i rm -o all stdout >> results.txt \ No newline at end of file +sclite -f 0 -r reference.trn trn -h hypothesis.trn trn -e utf-8 -i rm -o all stdout >> results.txt +python extract.py +mv results.tsv wikiniews_results.tsv \ No newline at end of file diff --git a/extract.py b/extract.py new file mode 100644 index 0000000..4aaa5ab --- /dev/null +++ b/extract.py @@ -0,0 +1,55 @@ +import re +import fileinput +import csv +wars = [] +corrects=0 +with open("results.txt", "r", encoding='utf-8') as ins: + for line in ins: + if line.startswith('Scores'): + correct = int(re.findall('\d+', line)[0]) + substitutions = int(re.findall('\d+', line)[1]) + deletes = int(re.findall('\d+', line)[2]) + inserts = int(re.findall('\d+', line)[3]) + war = (substitutions+deletes+inserts)/(substitutions+deletes+correct) + print(correct, substitutions, deletes, inserts) + wars.append(war) + if substitutions==0 and deletes==0 and inserts==0: + corrects = corrects + 1 + +srr = corrects / sum(wars) +print(srr) + +war_avg = sum(wars) / len(wars) +print(war_avg) +with open("wer_avg.txt", "w") as wer_avg_file: + wer_avg_file.write("WER avg: ") + wer_avg_file.write(str(war_avg)) +with open("srr.txt", "w") as srr_file: + srr_file.write("SRR: ") + srr_file.write(str(srr)) + +with open("wer_srr.txt", "w") as wer_srr: + wer_srr.write("SRR: ") + wer_srr.write(str(srr)) + wer_srr.write("\n") + wer_srr.write("WER avg: ") + wer_srr.write(str(war_avg)) + + +counter = 0; +with open("wikiniews_results.tsv",'r', encoding="utf-8") as wik_in: + with open("result.tsv",'w', encoding="utf-8") as f_out: + writer = csv.writer(f_out, delimiter='\t', lineterminator='\n') + reader = csv.reader(wik_in, delimiter='\t') + + result = [] + + for row in reader: + # add new column values + row.append(wars[counter]) + result.append(row) + counter=counter+1 + + writer.writerows(result) + +