#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Parse MTurk output file. """ import sys import collections import csv import glob import numpy as np def map_formality(text: str): values = {"Very Formal": 3, "Formal": 2, "Somewhat Formal": 1, "Neutral": 0, "Somewhat Informal": -1, "Informal": -2, "Very Informal": -3} if text in values: return values[text] else: return None def mapping(text: str): if text == 'Perfect': return 5 elif text == 'Comprehensible': return 4 elif text == 'Somewhat Comprehensible': return 3 elif text == 'Incomprehensible': return 2 elif text == 'Other': return 1 def encode_quatation_marks(text): return text.replace('\'', ''').replace('"', '"') def read_files_from_dir(dir_name): data = collections.defaultdict(list) for fname in glob.glob(f"./{dir_name}/formality.test.*"): with open(fname) as ff: for line in ff: line = line.strip() data[fname].append(encode_quatation_marks(line)) return data def main(): """ main """ result_file = sys.argv[1] dir_name = sys.argv[2] sent_scores = collections.defaultdict(list) with open(result_file) as results: mturk_data = csv.DictReader(results) for row in mturk_data: for i in range(1, 6): # sent_scores[row[f'Input.sentence_{i}']].append(mapping(row[f'Answer.Q{i}Answer'])) sent_scores[row[f'Input.sentence_{i}']].append(map_formality(row[f'Answer.sentence_{i}_choice'])) for sent in sent_scores: sent_scores[sent] = np.mean([i for i in sent_scores[sent] if i]) for filename, sents in read_files_from_dir(dir_name).items(): # print([sent_scores[sent] for sent in sents]) # print([sent_scores[sent] for sent in sents]) print(filename, np.nanmean([sent_scores[sent] for sent in sents])) if __name__ == "__main__": main()