s404686-mlworkshops/calc_wer.py

41 lines
1.0 KiB
Python

import sys
import pandas as pd
FILE_NAME = 'wikiniews_results.tsv'
correct = []
substitution = []
deletions = []
insertions = []
wer_list = []
correct_sentences = 0
def wer(c, s, d, i):
return (s + d + i)/(s + d + c)
for line in sys.stdin:
if line.startswith('Scores:'):
line = list(map(int, line.strip().split(' ')[5:]))
correct.append(line[0])
substitution.append(line[1])
deletions.append(line[2])
insertions.append(line[3])
wer_list.append(
wer(line[0], line[1], line[2], line[3])
)
if line[1] == 0 and line[2] == 0 and line[3] == 0:
correct_sentences += 1
srr = correct_sentences/len(wer_list)
wer_all = wer(sum(correct), sum(substitution), sum(deletions), sum(insertions))
data = pd.read_csv(FILE_NAME, sep='\t', header=None)
df = pd.DataFrame(data)
print(len(wer_list), len(data))
df.insert(4, '', wer_list)
df.to_csv(FILE_NAME, sep='\t')
with open('wer_srr_results.txt', 'w+') as f:
f.write("{}\t{}".format(srr, wer_all))