challenging-america-word-ga.../join.py
2021-06-19 13:25:42 +02:00

17 lines
661 B
Python

def process(f_in_path, f_exp_path, f_whole_path):
with open(f_in_path) as f_in, open(f_exp_path) as f_exp, open(f_whole_path,'w') as f_whole:
for line_in, line_exp in zip(f_in, f_exp):
_, _, left, right = line_in.rstrip('\n').split('\t')
middle = line_exp.rstrip('\n')
text = left + ' ' + middle + ' ' + right
text = text.replace('\\n', '\n') + '\n\n'
f_whole.write(text)
process('train/in.tsv', 'train/expected.tsv', 'train/wiki.train.raw')
process('dev-0/in.tsv', 'dev-0/expected.tsv', 'dev-0/wiki.valid.raw')
process('dev-0/in.tsv', 'dev-0/expected.tsv', 'dev-0/wiki.test.raw')