forked from tdwojak/Python2019
81 lines
2.0 KiB
Python
81 lines
2.0 KiB
Python
|
#!/usr/bin/env python3
|
||
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
"""
|
||
|
Parse MTurk output file.
|
||
|
"""
|
||
|
|
||
|
import sys
|
||
|
import collections
|
||
|
import csv
|
||
|
import glob
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
|
||
|
def map_formality(text: str):
|
||
|
values = {"Very Formal": 3,
|
||
|
"Formal": 2,
|
||
|
"Somewhat Formal": 1,
|
||
|
"Neutral": 0,
|
||
|
"Somewhat Informal": -1,
|
||
|
"Informal": -2,
|
||
|
"Very Informal": -3}
|
||
|
if text in values:
|
||
|
return values[text]
|
||
|
else:
|
||
|
return None
|
||
|
|
||
|
|
||
|
def mapping(text: str):
|
||
|
if text == 'Perfect':
|
||
|
return 5
|
||
|
elif text == 'Comprehensible':
|
||
|
return 4
|
||
|
elif text == 'Somewhat Comprehensible':
|
||
|
return 3
|
||
|
elif text == 'Incomprehensible':
|
||
|
return 2
|
||
|
elif text == 'Other':
|
||
|
return 1
|
||
|
|
||
|
|
||
|
def encode_quatation_marks(text):
|
||
|
return text.replace('\'', ''').replace('"', '"')
|
||
|
|
||
|
|
||
|
def read_files_from_dir(dir_name):
|
||
|
data = collections.defaultdict(list)
|
||
|
|
||
|
for fname in glob.glob(f"./{dir_name}/formality.test.*"):
|
||
|
with open(fname) as ff:
|
||
|
for line in ff:
|
||
|
line = line.strip()
|
||
|
data[fname].append(encode_quatation_marks(line))
|
||
|
return data
|
||
|
|
||
|
|
||
|
def main():
|
||
|
""" main """
|
||
|
result_file = sys.argv[1]
|
||
|
dir_name = sys.argv[2]
|
||
|
|
||
|
sent_scores = collections.defaultdict(list)
|
||
|
with open(result_file) as results:
|
||
|
mturk_data = csv.DictReader(results)
|
||
|
for row in mturk_data:
|
||
|
for i in range(1, 6):
|
||
|
# sent_scores[row[f'Input.sentence_{i}']].append(mapping(row[f'Answer.Q{i}Answer']))
|
||
|
sent_scores[row[f'Input.sentence_{i}']].append(map_formality(row[f'Answer.sentence_{i}_choice']))
|
||
|
for sent in sent_scores:
|
||
|
sent_scores[sent] = np.mean([i for i in sent_scores[sent] if i])
|
||
|
|
||
|
for filename, sents in read_files_from_dir(dir_name).items():
|
||
|
# print([sent_scores[sent] for sent in sents])
|
||
|
# print([sent_scores[sent] for sent in sents])
|
||
|
print(filename, np.nanmean([sent_scores[sent] for sent in sents]))
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|