challenging-america-year-pr.../hf_roberta_base/01_create_datasets.py
2021-12-13 12:56:26 +01:00

14 lines
605 B
Python

import datetime
for split in 'train', 'dev-0':
with open(f'../{split}/in.tsv') as f_in, open(f'../{split}/expected.tsv') as f_exp, open(f'./{split}_huggingface_format.csv', 'w') as f_hf:
f_hf.write('year\ttext\n')
for line_in,line_exp in zip(f_in,f_exp):
f_hf.write(line_exp.rstrip() + '\t' + line_in.split('\t')[1])
for split in ('test-A',):
with open(f'../{split}/in.tsv') as f_in, open(f'./{split}_huggingface_format.csv', 'w') as f_hf:
f_hf.write('year\ttext\n')
for line_in in f_in:
f_hf.write('0.0' + '\t' + line_in.split('\t')[1])