import calendar import datetime def to_fractional_year(d: datetime.datetime) -> float: """ Converts a date stamp to a fractional year (i.e. number like `1939.781`) """ is_leap = calendar.isleap(d.year) t = d.timetuple() day_of_year = t.tm_yday day_time = (60 * 60 * t.tm_hour + 60 * t.tm_min + t.tm_sec) / (24 * 60 * 60) days_in_year = 366 if is_leap else 365 return d.year + ((day_of_year - 1 + day_time) / days_in_year) def fractional_to_date(fractional): eps = 0.0001 year = int(fractional) is_leap = calendar.isleap(year) modulus = fractional % 1 days_in_year = 366 if is_leap else 365 day_of_year = int( days_in_year * modulus + eps ) d = datetime.datetime(year, 1,1) + datetime.timedelta(days = day_of_year ) return d dates = (datetime.datetime(1825,10,30), datetime.datetime(1825,10,31), datetime.datetime(1900,1,1), datetime.datetime(1900,12,1), datetime.datetime(1900,12,31), datetime.datetime(1930,2,28), datetime.datetime(1932,2,29), ) for d in dates: inverted = fractional_to_date(to_fractional_year(d)) assert d == inverted def convert_to_year_text_format(line): _,_,_,fractional_year, _, _, text_l, text_r = line.split('\t') date = fractional_to_date(float(fractional_year)) year = date.year month = date.month day = date.day weekday = date.weekday() return f'year: {year} month: {month} day: {day} weekday: {weekday} text: \t' + text_l + '\t' + text_r def convert_dataset(f_in_path, f_out_path): with open(f_in_path,'r') as f_in, open(f_out_path, 'w') as f_out: for line in f_in: out = convert_to_year_text_format(line) f_out.write(out) convert_dataset('../dev-0/in.tsv', './dev-0-date.tsv') convert_dataset('../test-A/in.tsv', './test-A-date.tsv')