This commit is contained in:
Kuba Kowalczyk 2019-05-07 12:34:21 +02:00
parent 99a18f41b7
commit e351eb53db
4 changed files with 63 additions and 4 deletions

View File

@ -4,6 +4,7 @@ RUN apt update -y && apt install -y make
RUN apt install -y git
RUN apt install -y gcc
RUN apt install -y gawk
RUN apt install -y python
RUN gcc --version
RUN apt install -y build-essential
RUN git clone https://github.com/usnistgov/SCTK.git

7
Jenkinsfile vendored
View File

@ -7,9 +7,10 @@ pipeline {
checkout([$class: 'GitSCM', branches: [[name: '*/master']], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: '8cdbf19a-e62d-4807-9112-bc7a8d225074', url: 'https://git.wmi.amu.edu.pl/s416094/s416094-mlworkshops']]])
sh label: '', script: 'chmod 777 counter.sh'
sh label: '', script: './counter.sh'
archiveArtifacts 'hypothesis.trn'
archiveArtifacts 'reference.trn'
archiveArtifacts 'results.txt'
archiveArtifacts 'wikiniews_results.tsv'
archiveArtifacts 'wer_avg.txt'
archiveArtifacts 'srr.txt'
archiveArtifacts 'wer_srr.txt'
}
}
}

View File

@ -2,3 +2,5 @@
awk 'BEGIN { FS="\t" } {print gensub("^\\*\\*", "\\\\**", "g", $2) "\t(sp1_"NR")"}' wikiniews_results.tsv > reference.trn
awk 'BEGIN { FS="\t" } {print gensub("^\\*\\*", "\\\\**", "g", $3) "\t(sp1_"NR")"}' wikiniews_results.tsv > hypothesis.trn
sclite -f 0 -r reference.trn trn -h hypothesis.trn trn -e utf-8 -i rm -o all stdout >> results.txt
python extract.py
mv results.tsv wikiniews_results.tsv

55
extract.py Normal file
View File

@ -0,0 +1,55 @@
import re
import fileinput
import csv
wars = []
corrects=0
with open("results.txt", "r", encoding='utf-8') as ins:
for line in ins:
if line.startswith('Scores'):
correct = int(re.findall('\d+', line)[0])
substitutions = int(re.findall('\d+', line)[1])
deletes = int(re.findall('\d+', line)[2])
inserts = int(re.findall('\d+', line)[3])
war = (substitutions+deletes+inserts)/(substitutions+deletes+correct)
print(correct, substitutions, deletes, inserts)
wars.append(war)
if substitutions==0 and deletes==0 and inserts==0:
corrects = corrects + 1
srr = corrects / sum(wars)
print(srr)
war_avg = sum(wars) / len(wars)
print(war_avg)
with open("wer_avg.txt", "w") as wer_avg_file:
wer_avg_file.write("WER avg: ")
wer_avg_file.write(str(war_avg))
with open("srr.txt", "w") as srr_file:
srr_file.write("SRR: ")
srr_file.write(str(srr))
with open("wer_srr.txt", "w") as wer_srr:
wer_srr.write("SRR: ")
wer_srr.write(str(srr))
wer_srr.write("\n")
wer_srr.write("WER avg: ")
wer_srr.write(str(war_avg))
counter = 0;
with open("wikiniews_results.tsv",'r', encoding="utf-8") as wik_in:
with open("result.tsv",'w', encoding="utf-8") as f_out:
writer = csv.writer(f_out, delimiter='\t', lineterminator='\n')
reader = csv.reader(wik_in, delimiter='\t')
result = []
for row in reader:
# add new column values
row.append(wars[counter])
result.append(row)
counter=counter+1
writer.writerows(result)