import datetime
from tqdm import tqdm

#with open('../test-A/in.tsv','r') as f_in, open(f'./test-A_in.csv', 'w') as f_hf:
#    f_hf.write('text\n')
#    for line_in in f_in:
#        year_cont, date, text = line_in.rstrip('\n').split('\t')
#        d = datetime.datetime.strptime(date,"%Y%m%d")
#        day_of_year = str(d.timetuple().tm_yday)
#        day_of_month = str(d.day)
#        month = str(d.month)
#        year = str(d.year)
#        weekday = str(d.weekday())
#        day_of_year = str(d.timetuple().tm_yday)
#        f_hf.write(text +'\n')


for dataset in 'train', 'dev-0':
    with open(f'../{dataset}/in.tsv') as f_in, open(f'./{dataset}_in.csv','w') as f_hf:
        f_hf.write('text\n')
        for line_in in tqdm(f_in):
            _, _, date, year_frac,_, _, _, text = line_in.rstrip('\n').split('\t')
            d = datetime.datetime.strptime(date[:19],"%Y-%m-%d %H:%M:%S")
            day_of_year = str(d.timetuple().tm_yday)
            day_of_month = str(d.day)
            month = str(d.month)
            year = str(d.year)
            weekday = str(d.weekday())
            day_of_year = str(d.timetuple().tm_yday)

            text = text.replace('\\n','')

            text_splitted = text.split(' ')
            for i in range(0, len(text_splitted), 400):
                text_chunk = ' '.join(text_splitted[i:i+400])
                f_hf.write(text_chunk +'\n')