for split in 'train', 'dev-0': with open(f'../{split}/in.tsv') as f_in, open(f'../{split}/expected.tsv') as f_exp, open(f'./{split}_huggingface_format.csv', 'w') as f_hf: f_hf.write('year_start_float\tyear_end_float\tyear_middle_float\tyear_middle_int\ttext\n') for line_in, line_exp in zip(f_in, f_exp): year_start_float, year_end_float = line_exp.rstrip().split(',') year_middle_float = (float(year_start_float) + float(year_end_float)) / 2 year_middle_int = round(year_middle_float) f_hf.write(f'{year_start_float}\t{year_end_float}\t{year_middle_float}\t{year_middle_int}\t{line_in}') for split in ('test-A',): with open(f'../{split}/in.tsv') as f_in, open(f'./{split}_huggingface_format.csv', 'w') as f_hf: f_hf.write('year_start_float\tyear_end_float\tyear_middle_float\tyear_middle_int\ttext\n') for line_in in f_in: expected = '0.0\t0.0\t0.0\t0' f_hf.write(expected + '\t' + line_in)