s421818-mlworkshops/createWERandSRRFiles.py

58 lines
1.3 KiB
Python
Raw Permalink Normal View History

2020-04-03 14:21:13 +02:00
import re
2020-04-03 15:33:21 +02:00
import csv
2020-04-03 14:21:13 +02:00
2020-04-03 15:52:15 +02:00
wer = open("wer.txt","a")
ssr = open("ssr.txt","a")
2020-04-03 14:21:13 +02:00
2020-04-03 14:54:50 +02:00
lastId = ""
2020-04-03 14:48:28 +02:00
globalPercent = 0
counter = 0
2020-04-03 15:13:51 +02:00
ssrCounter = 0
2020-04-03 15:05:37 +02:00
2020-04-03 15:33:21 +02:00
inputLines = ""
werArray = []
2020-04-03 14:21:13 +02:00
with open("helper.txt", "r") as f:
for line in f:
2020-04-03 14:49:48 +02:00
if re.match(r'^id:.*', line):
2020-04-03 14:48:28 +02:00
lastId = line.replace("\n", "")
2020-04-03 14:49:48 +02:00
elif re.match(r'^Scores:.*', line):
helper = line.replace("Scores: (#C #S #D #I) ", "").split()
2020-04-03 14:48:28 +02:00
sum = int(helper[0]) + int(helper[1]) + int(helper[2]) + int(helper[3])
2020-04-03 14:59:01 +02:00
sum = ((float(helper[0]))/(float(sum))) * 100
2020-04-03 15:05:37 +02:00
if (sum == 100):
2020-04-03 15:13:51 +02:00
ssrCounter += 1
2020-04-03 14:48:28 +02:00
globalPercent += sum
counter += 1
2020-04-03 15:33:21 +02:00
werArray.append(sum)
2020-04-03 16:02:21 +02:00
#wer.write(lastId + " = " + str(sum) + "%\n")
2020-04-03 14:48:28 +02:00
2020-04-03 15:53:20 +02:00
wer.write("AVG: " + str(float((globalPercent) / (float(counter) * 100)*100)) + "%\n\n\n\n")
ssr.write(str(float(ssrCounter)/float(counter) * 100) + "%\n")
2020-04-03 15:05:37 +02:00
wer.close()
2020-04-03 15:33:21 +02:00
ssr.close()
2020-04-03 15:37:44 +02:00
2020-04-03 15:33:21 +02:00
with open('wikiniews_results.tsv','r') as f_in:
2020-04-03 15:54:22 +02:00
with open('wikiniews_resultsWithAdditionalColumn.tsv', 'w') as f_out:
2020-04-03 15:38:58 +02:00
writer = csv.writer(f_out, delimiter=' ', lineterminator='\n')
reader = csv.reader(f_in, delimiter=' ')
result = []
# read headers
row = next(reader)
# add new header to list of headers
row.append('Col5')
result.append(row)
2020-04-03 15:36:54 +02:00
2020-04-03 15:37:44 +02:00
cc = 0
2020-04-03 15:38:58 +02:00
for row in reader:
# add new column values
2020-04-03 15:39:40 +02:00
row.append(werArray[cc])
2020-04-03 15:37:44 +02:00
cc += 1
2020-04-03 15:38:58 +02:00
result.append(row)
2020-04-03 15:33:21 +02:00
2020-04-03 15:38:58 +02:00
writer.writerows(result)