From d490ada5aff7ed811e4f3e7ddefad3b8d63f221a Mon Sep 17 00:00:00 2001 From: laskau Date: Thu, 16 May 2019 13:50:58 +0200 Subject: [PATCH] zad3 --- Dockerfile | 10 +++++++++- Jenkinsfile | 10 ++++++++++ metryki.py | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ metryki.sh | 2 +- requirements.txt | 3 +++ 5 files changed, 71 insertions(+), 2 deletions(-) create mode 100644 metryki.py create mode 100644 requirements.txt diff --git a/Dockerfile b/Dockerfile index 9752dd3..b2f2c1c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,4 +9,12 @@ RUN apt install -y build-essential RUN git clone https://github.com/usnistgov/SCTK.git WORKDIR SCTK RUN make config && make all && make check && make install && make doc -ENV PATH=$PATH:/SCTK/bin \ No newline at end of file +ENV PATH=$PATH:/SCTK/bin + +FROM python:3.6.1 +ENV PYTHONUNBUFFERED 1 +RUN mkdir /code +WORKDIR /code +ADD requirements.txt /code/ +RUN pip3 install -r requirements.txt +ADD . /code/ \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile index 3f07341..6f51f98 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -21,6 +21,7 @@ pipeline { stage('Run script') { steps{ + sh 'chmod 755 ./script.sh' sh 'sh script.sh text.txt out.txt' archiveArtifacts artifacts: 'out.txt' } @@ -32,7 +33,16 @@ pipeline { } } + stage('WER & SRR') { + steps { + sh 'chmod 755 ./metryki.sh' + sh 'sh metryki.sh' + archiveArtifacts artifacts: 'wikiniews_results_with_wer.tsv' + archiveArtifacts artifacts: 'wer_mean.txt' + archiveArtifacts artifacts: 'srr.txt' + } + } } } diff --git a/metryki.py b/metryki.py new file mode 100644 index 0000000..93a399d --- /dev/null +++ b/metryki.py @@ -0,0 +1,48 @@ +import pandas as pd +from jiwer import wer +import numpy as np + +correct_answers = 0 + +df = pd.read_csv('./Infra/wikiniews_results.tsv', + sep='\t', + index_col=False, + header=None, + keep_default_na=False, + skip_blank_lines=False, + names=['train', 'ground_truth', 'hypothesis', 'link']) + +df['WER'] = np.nan + +print(df['train'].head()) +print(df['ground_truth'].head()) +print(df['hypothesis'].head()) +print(df['link'].head()) +print(df['WER'].head()) + +for index, row in df.iterrows(): + ground_truth = row['ground_truth'] + hypothesis = row['hypothesis'] + error_wer = wer(ground_truth, hypothesis) + df.loc[index, 'WER'] = error_wer + + if error_wer == 0.0: + correct_answers += 1 + + +print(df['WER']) +print(correct_answers) + + +error_wer_mean = df['WER'].mean() +srr = (correct_answers * 1) / len(df) + + + +# save to file +df.to_csv('./wikiniews_results_with_wer.tsv', sep='\t', header=None) + +with open("wer_mean.txt", "w") as file: + file.write(str(df['WER'].mean())) +with open("srr.txt", "w") as file: + file.write(str(srr)) diff --git a/metryki.sh b/metryki.sh index adc5e59..aa32b68 100755 --- a/metryki.sh +++ b/metryki.sh @@ -1,4 +1,4 @@ #!/usr/bin/env bash -awk '{print gensub("^\\*\\*", "\\\\**", "g", $0) "\t(sp1_"NR")"}' < $1 > $2 +python3 calculate_metrics.py diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b3bdd20 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +jiwer +pandas +numpy \ No newline at end of file