This commit is contained in:
laskau 2019-05-16 13:50:58 +02:00
parent 36a5dad2e7
commit d490ada5af
5 changed files with 71 additions and 2 deletions

View File

@ -9,4 +9,12 @@ RUN apt install -y build-essential
RUN git clone https://github.com/usnistgov/SCTK.git
WORKDIR SCTK
RUN make config && make all && make check && make install && make doc
ENV PATH=$PATH:/SCTK/bin
ENV PATH=$PATH:/SCTK/bin
FROM python:3.6.1
ENV PYTHONUNBUFFERED 1
RUN mkdir /code
WORKDIR /code
ADD requirements.txt /code/
RUN pip3 install -r requirements.txt
ADD . /code/

10
Jenkinsfile vendored
View File

@ -21,6 +21,7 @@ pipeline {
stage('Run script') {
steps{
sh 'chmod 755 ./script.sh'
sh 'sh script.sh text.txt out.txt'
archiveArtifacts artifacts: 'out.txt'
}
@ -32,7 +33,16 @@ pipeline {
}
}
stage('WER & SRR') {
steps {
sh 'chmod 755 ./metryki.sh'
sh 'sh metryki.sh'
archiveArtifacts artifacts: 'wikiniews_results_with_wer.tsv'
archiveArtifacts artifacts: 'wer_mean.txt'
archiveArtifacts artifacts: 'srr.txt'
}
}
}
}

48
metryki.py Normal file
View File

@ -0,0 +1,48 @@
import pandas as pd
from jiwer import wer
import numpy as np
correct_answers = 0
df = pd.read_csv('./Infra/wikiniews_results.tsv',
sep='\t',
index_col=False,
header=None,
keep_default_na=False,
skip_blank_lines=False,
names=['train', 'ground_truth', 'hypothesis', 'link'])
df['WER'] = np.nan
print(df['train'].head())
print(df['ground_truth'].head())
print(df['hypothesis'].head())
print(df['link'].head())
print(df['WER'].head())
for index, row in df.iterrows():
ground_truth = row['ground_truth']
hypothesis = row['hypothesis']
error_wer = wer(ground_truth, hypothesis)
df.loc[index, 'WER'] = error_wer
if error_wer == 0.0:
correct_answers += 1
print(df['WER'])
print(correct_answers)
error_wer_mean = df['WER'].mean()
srr = (correct_answers * 1) / len(df)
# save to file
df.to_csv('./wikiniews_results_with_wer.tsv', sep='\t', header=None)
with open("wer_mean.txt", "w") as file:
file.write(str(df['WER'].mean()))
with open("srr.txt", "w") as file:
file.write(str(srr))

View File

@ -1,4 +1,4 @@
#!/usr/bin/env bash
awk '{print gensub("^\\*\\*", "\\\\**", "g", $0) "\t(sp1_"NR")"}' < $1 > $2
python3 calculate_metrics.py

3
requirements.txt Normal file
View File

@ -0,0 +1,3 @@
jiwer
pandas
numpy