From d490ada5aff7ed811e4f3e7ddefad3b8d63f221a Mon Sep 17 00:00:00 2001
From: laskau <laskau@users.noreply.github.com>
Date: Thu, 16 May 2019 13:50:58 +0200
Subject: [PATCH] zad3

---
 Dockerfile       | 10 +++++++++-
 Jenkinsfile      | 10 ++++++++++
 metryki.py       | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 metryki.sh       |  2 +-
 requirements.txt |  3 +++
 5 files changed, 71 insertions(+), 2 deletions(-)
 create mode 100644 metryki.py
 create mode 100644 requirements.txt

diff --git a/Dockerfile b/Dockerfile
index 9752dd3..b2f2c1c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -9,4 +9,12 @@ RUN apt install -y build-essential
 RUN git clone https://github.com/usnistgov/SCTK.git
 WORKDIR SCTK
 RUN make config && make all && make check &&  make install && make doc
-ENV PATH=$PATH:/SCTK/bin
\ No newline at end of file
+ENV PATH=$PATH:/SCTK/bin
+
+FROM python:3.6.1
+ENV PYTHONUNBUFFERED 1
+RUN mkdir /code
+WORKDIR /code
+ADD requirements.txt /code/
+RUN pip3 install -r requirements.txt
+ADD . /code/
\ No newline at end of file
diff --git a/Jenkinsfile b/Jenkinsfile
index 3f07341..6f51f98 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -21,6 +21,7 @@ pipeline {
 
          stage('Run script') {
              steps{
+                 sh 'chmod 755 ./script.sh'
                  sh 'sh script.sh text.txt out.txt'
                  archiveArtifacts artifacts: 'out.txt'
              }
@@ -32,7 +33,16 @@ pipeline {
             }
         }
 
+         stage('WER & SRR') {
+            steps {
+                 sh 'chmod 755 ./metryki.sh'
+                 sh 'sh metryki.sh'
+                 archiveArtifacts artifacts: 'wikiniews_results_with_wer.tsv'
+                 archiveArtifacts artifacts: 'wer_mean.txt'
+                 archiveArtifacts artifacts: 'srr.txt'
 
+            }
+        }
 
     }
 }
diff --git a/metryki.py b/metryki.py
new file mode 100644
index 0000000..93a399d
--- /dev/null
+++ b/metryki.py
@@ -0,0 +1,48 @@
+import pandas as pd
+from jiwer import wer
+import numpy as np
+
+correct_answers = 0
+
+df = pd.read_csv('./Infra/wikiniews_results.tsv',
+                 sep='\t',
+                 index_col=False,
+                 header=None,
+                 keep_default_na=False,
+                 skip_blank_lines=False,
+                 names=['train', 'ground_truth', 'hypothesis', 'link'])
+
+df['WER'] = np.nan
+
+print(df['train'].head())
+print(df['ground_truth'].head())
+print(df['hypothesis'].head())
+print(df['link'].head())
+print(df['WER'].head())
+
+for index, row in df.iterrows():
+    ground_truth = row['ground_truth']
+    hypothesis = row['hypothesis']
+    error_wer = wer(ground_truth, hypothesis)
+    df.loc[index, 'WER'] = error_wer
+
+    if error_wer == 0.0:
+        correct_answers += 1
+
+
+print(df['WER'])
+print(correct_answers)
+
+
+error_wer_mean = df['WER'].mean()
+srr = (correct_answers * 1) / len(df)
+
+
+
+# save to file
+df.to_csv('./wikiniews_results_with_wer.tsv', sep='\t', header=None)
+
+with open("wer_mean.txt", "w") as file:
+    file.write(str(df['WER'].mean()))
+with open("srr.txt", "w") as file:
+    file.write(str(srr))
diff --git a/metryki.sh b/metryki.sh
index adc5e59..aa32b68 100755
--- a/metryki.sh
+++ b/metryki.sh
@@ -1,4 +1,4 @@
 #!/usr/bin/env bash
 
-awk '{print gensub("^\\*\\*", "\\\\**", "g", $0) "\t(sp1_"NR")"}' < $1 > $2
+python3 calculate_metrics.py
 
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..b3bdd20
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+jiwer
+pandas
+numpy
\ No newline at end of file