s407254-mlworkshops/metryki.py
2019-05-16 13:50:58 +02:00

49 lines
1.1 KiB
Python

import pandas as pd
from jiwer import wer
import numpy as np
correct_answers = 0
df = pd.read_csv('./Infra/wikiniews_results.tsv',
sep='\t',
index_col=False,
header=None,
keep_default_na=False,
skip_blank_lines=False,
names=['train', 'ground_truth', 'hypothesis', 'link'])
df['WER'] = np.nan
print(df['train'].head())
print(df['ground_truth'].head())
print(df['hypothesis'].head())
print(df['link'].head())
print(df['WER'].head())
for index, row in df.iterrows():
ground_truth = row['ground_truth']
hypothesis = row['hypothesis']
error_wer = wer(ground_truth, hypothesis)
df.loc[index, 'WER'] = error_wer
if error_wer == 0.0:
correct_answers += 1
print(df['WER'])
print(correct_answers)
error_wer_mean = df['WER'].mean()
srr = (correct_answers * 1) / len(df)
# save to file
df.to_csv('./wikiniews_results_with_wer.tsv', sep='\t', header=None)
with open("wer_mean.txt", "w") as file:
file.write(str(df['WER'].mean()))
with open("srr.txt", "w") as file:
file.write(str(srr))