65 lines
1.8 KiB
Python
65 lines
1.8 KiB
Python
import calendar
|
|
import datetime
|
|
|
|
def to_fractional_year(d: datetime.datetime) -> float:
|
|
"""
|
|
Converts a date stamp to a fractional year (i.e. number like `1939.781`)
|
|
"""
|
|
is_leap = calendar.isleap(d.year)
|
|
t = d.timetuple()
|
|
day_of_year = t.tm_yday
|
|
day_time = (60 * 60 * t.tm_hour + 60 * t.tm_min + t.tm_sec) / (24 * 60 * 60)
|
|
|
|
days_in_year = 366 if is_leap else 365
|
|
|
|
return d.year + ((day_of_year - 1 + day_time) / days_in_year)
|
|
|
|
def fractional_to_date(fractional):
|
|
eps = 0.0001
|
|
year = int(fractional)
|
|
is_leap = calendar.isleap(year)
|
|
|
|
modulus = fractional % 1
|
|
|
|
days_in_year = 366 if is_leap else 365
|
|
|
|
day_of_year = int( days_in_year * modulus + eps )
|
|
|
|
d = datetime.datetime(year, 1,1) + datetime.timedelta(days = day_of_year )
|
|
|
|
return d
|
|
|
|
dates = (datetime.datetime(1825,10,30),
|
|
datetime.datetime(1825,10,31),
|
|
datetime.datetime(1900,1,1),
|
|
datetime.datetime(1900,12,1),
|
|
datetime.datetime(1900,12,31),
|
|
datetime.datetime(1930,2,28),
|
|
datetime.datetime(1932,2,29),
|
|
)
|
|
|
|
for d in dates:
|
|
inverted = fractional_to_date(to_fractional_year(d))
|
|
assert d == inverted
|
|
|
|
|
|
|
|
def convert_to_year_text_format(line):
|
|
_,_,_,fractional_year, _, _, text_l, text_r = line.split('\t')
|
|
date = fractional_to_date(float(fractional_year))
|
|
year = date.year
|
|
month = date.month
|
|
day = date.day
|
|
weekday = date.weekday()
|
|
return f'year: {year} month: {month} day: {day} weekday: {weekday} text: \t' + text_l + '\t' + text_r
|
|
|
|
|
|
def convert_dataset(f_in_path, f_out_path):
|
|
with open(f_in_path,'r') as f_in, open(f_out_path, 'w') as f_out:
|
|
for line in f_in:
|
|
out = convert_to_year_text_format(line)
|
|
f_out.write(out)
|
|
|
|
convert_dataset('../dev-0/in.tsv', './dev-0-date.tsv')
|
|
convert_dataset('../test-A/in.tsv', './test-A-date.tsv')
|