s407254-mlworkshops/metryki.py

import pandas as pd
from jiwer import wer
import numpy as np

correct_answers = 0

df = pd.read_csv('./Infra/wikiniews_results.tsv',
                 sep='\t',
                 index_col=False,
                 header=None,
                 keep_default_na=False,
                 skip_blank_lines=False,
                 names=['train', 'ground_truth', 'hypothesis', 'link'])

df['WER'] = np.nan

print(df['train'].head())
print(df['ground_truth'].head())
print(df['hypothesis'].head())
print(df['link'].head())
print(df['WER'].head())

for index, row in df.iterrows():
    ground_truth = row['ground_truth']
    hypothesis = row['hypothesis']
    error_wer = wer(ground_truth, hypothesis)
    df.loc[index, 'WER'] = error_wer

    if error_wer == 0.0:
        correct_answers += 1


print(df['WER'])
print(correct_answers)


error_wer_mean = df['WER'].mean()
srr = (correct_answers * 1) / len(df)


# save to file
df.to_csv('./wikiniews_results_with_wer.tsv', sep='\t', header=None)

with open("wer_mean.txt", "w") as file:
    file.write(str(df['WER'].mean()))
with open("srr.txt", "w") as file:
    file.write(str(srr))
No results found.