ireland-news-headlines/roberta_temp/01a_create_guess_date_datasets.py

18 lines
1012 B
Python
Raw Permalink Normal View History

2021-09-24 15:29:02 +02:00
import datetime
for dataset in 'train', 'dev-0':
with open(f'../{dataset}/in.tsv') as f_in, open(f'../{dataset}/expected.tsv') as f_exp, open(f'../{dataset}/huggingface_guess_day.csv','w') as f_hf:
f_hf.write('text\tyear_cont\tdate\tday_of_year\tday_of_month\tmonth\tyear\tweekday\tlabel\n')
for line_in, line_exp in zip(f_in, f_exp):
year_cont,date,text = line_in.rstrip('\n').split('\t')
d = datetime.datetime.strptime(date,"%Y%m%d")
day_of_year = str(d.timetuple().tm_yday)
day_of_month = str(d.day)
month = str(d.month)
year = str(d.year)
weekday = str(d.weekday())
day_of_year = str(d.timetuple().tm_yday)
#label = f'year:{year} month:{month} day:{day_of_month} weekday:{weekday}'
label = weekday
f_hf.write(text +'\t' +year_cont +'\t'+ date + '\t'+ day_of_year + '\t' + day_of_month + '\t' + month + '\t' + year + '\t' + weekday + '\t' + label + '\n')