s441471-mlworkshops/calc_wer.py

53 lines
1008 B
Python
Raw Permalink Normal View History

2019-04-26 14:23:37 +02:00
import sys
import pandas as pd
correct, insertion, deletion, substitution = [] , [] , [] , []
all_wers = []
srr = 0
def wer(s,d,c,i):
return (s + d + i )/(s + d + c)
for line in sys.stdin:
if line.startswith('Scores'):
line = line.split(' ') [5:]
c = int(line[0])
s = int(line[1])
d = int(line[2])
i = int(line[3])
if d == 0 and s == 0 and i == 0:
srr +=1
correct.append(c)
substitution.append(s)
deletion.append(d)
insertion.append(i)
all_wers.append(wer(s,d,c,i))
all_c = sum(correct)
all_s = sum(substitution)
all_d = sum(deletion)
all_i = sum(insertion)
total_wer = wer(all_s,all_d,all_c,all_i)
print(total_wer)
2019-04-26 14:46:17 +02:00
data = pd.read_csv('wikiniews_results.tsv',sep = '\t',header=None)
2019-04-26 14:23:37 +02:00
df = pd.DataFrame(data)
df.insert(4,'',all_wers)
df.to_csv('wikiniews_results.tsv',sep = '\t')
srr = srr/len(all_wers)
with open('wer_srr.txt', 'w+') as f:
f.write("{} {}".format(srr,total_wer))