Python2019/labs03/parse_results.py

81 lines
2.0 KiB
Python
Raw Normal View History

2019-02-09 08:54:14 +01:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Parse MTurk output file.
"""
import sys
import collections
import csv
import glob
import numpy as np
def map_formality(text: str):
values = {"Very Formal": 3,
"Formal": 2,
"Somewhat Formal": 1,
"Neutral": 0,
"Somewhat Informal": -1,
"Informal": -2,
"Very Informal": -3}
if text in values:
return values[text]
else:
return None
def mapping(text: str):
if text == 'Perfect':
return 5
elif text == 'Comprehensible':
return 4
elif text == 'Somewhat Comprehensible':
return 3
elif text == 'Incomprehensible':
return 2
elif text == 'Other':
return 1
def encode_quatation_marks(text):
return text.replace('\'', ''').replace('"', '"')
def read_files_from_dir(dir_name):
data = collections.defaultdict(list)
for fname in glob.glob(f"./{dir_name}/formality.test.*"):
with open(fname) as ff:
for line in ff:
line = line.strip()
data[fname].append(encode_quatation_marks(line))
return data
def main():
""" main """
result_file = sys.argv[1]
dir_name = sys.argv[2]
sent_scores = collections.defaultdict(list)
with open(result_file) as results:
mturk_data = csv.DictReader(results)
for row in mturk_data:
for i in range(1, 6):
# sent_scores[row[f'Input.sentence_{i}']].append(mapping(row[f'Answer.Q{i}Answer']))
sent_scores[row[f'Input.sentence_{i}']].append(map_formality(row[f'Answer.sentence_{i}_choice']))
for sent in sent_scores:
sent_scores[sent] = np.mean([i for i in sent_scores[sent] if i])
for filename, sents in read_files_from_dir(dir_name).items():
# print([sent_scores[sent] for sent in sents])
# print([sent_scores[sent] for sent in sents])
print(filename, np.nanmean([sent_scores[sent] for sent in sents]))
if __name__ == "__main__":
main()