zad3

2019-05-16 13:50:58 +02:00 · 2019-05-16 13:50:58 +02:00 · d490ada5af
commit d490ada5af
parent 36a5dad2e7
5 changed files with 71 additions and 2 deletions
--- a/10
+++ b/10
@ -9,4 +9,12 @@ RUN apt install -y build-essential
 RUN git clone https://github.com/usnistgov/SCTK.git
 WORKDIR SCTK
 RUN make config && make all && make check &&  make install && make doc
-ENV PATH=$PATH:/SCTK/bin
+ENV PATH=$PATH:/SCTK/bin
+
+FROM python:3.6.1
+ENV PYTHONUNBUFFERED 1
+RUN mkdir /code
+WORKDIR /code
+ADD requirements.txt /code/
+RUN pip3 install -r requirements.txt
+ADD . /code/
--- a/10
+++ b/10
@ -21,6 +21,7 @@ pipeline {

         stage('Run script') {
             steps{
+                 sh 'chmod 755 ./script.sh'
                 sh 'sh script.sh text.txt out.txt'
                 archiveArtifacts artifacts: 'out.txt'
             }
@ -32,7 +33,16 @@ pipeline {
            }
        }

+         stage('WER & SRR') {
+            steps {
+                 sh 'chmod 755 ./metryki.sh'
+                 sh 'sh metryki.sh'
+                 archiveArtifacts artifacts: 'wikiniews_results_with_wer.tsv'
+                 archiveArtifacts artifacts: 'wer_mean.txt'
+                 archiveArtifacts artifacts: 'srr.txt'

+            }
+        }

    }
 }
--- a/metryki.py
+++ b/metryki.py
@ -0,0 +1,48 @@
+import pandas as pd
+from jiwer import wer
+import numpy as np
+
+correct_answers = 0
+
+df = pd.read_csv('./Infra/wikiniews_results.tsv',
+                 sep='\t',
+                 index_col=False,
+                 header=None,
+                 keep_default_na=False,
+                 skip_blank_lines=False,
+                 names=['train', 'ground_truth', 'hypothesis', 'link'])
+
+df['WER'] = np.nan
+
+print(df['train'].head())
+print(df['ground_truth'].head())
+print(df['hypothesis'].head())
+print(df['link'].head())
+print(df['WER'].head())
+
+for index, row in df.iterrows():
+    ground_truth = row['ground_truth']
+    hypothesis = row['hypothesis']
+    error_wer = wer(ground_truth, hypothesis)
+    df.loc[index, 'WER'] = error_wer
+
+    if error_wer == 0.0:
+        correct_answers += 1
+
+
+print(df['WER'])
+print(correct_answers)
+
+
+error_wer_mean = df['WER'].mean()
+srr = (correct_answers * 1) / len(df)
+
+
+
+# save to file
+df.to_csv('./wikiniews_results_with_wer.tsv', sep='\t', header=None)
+
+with open("wer_mean.txt", "w") as file:
+    file.write(str(df['WER'].mean()))
+with open("srr.txt", "w") as file:
+    file.write(str(srr))
--- a/metryki.sh
+++ b/metryki.sh
@ -1,4 +1,4 @@
 #!/usr/bin/env bash

-awk '{print gensub("^\\*\\*", "\\\\**", "g", $0) "\t(sp1_"NR")"}' < $1 > $2
+python3 calculate_metrics.py

--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,3 @@
+jiwer
+pandas
+numpy