change word limit from 400 to 450

This commit is contained in:
Jakub Pokrywka 2021-11-06 15:44:43 +01:00
parent 00fec94240
commit ab804f0c9e
2 changed files with 4 additions and 4 deletions

View File

@ -31,7 +31,7 @@ for dataset in 'train', 'dev-0':
text = text.replace('\\n','') text = text.replace('\\n','')
text_splitted = text.split(' ') text_splitted = text.split(' ')
for i in range(0, len(text_splitted), 400): for i in range(0, len(text_splitted), 450):
text_chunk = ' '.join(text_splitted[i:i+400]) text_chunk = ' '.join(text_splitted[i:i+450])
f_hf.write(text_chunk +'\n') f_hf.write(text_chunk +'\n')

View File

@ -31,8 +31,8 @@ for dataset in 'train', 'dev-0':
text = text.replace('\\n','') text = text.replace('\\n','')
text_splitted = text.split(' ') text_splitted = text.split(' ')
for i in range(0, len(text_splitted), 400): for i in range(0, len(text_splitted), 450):
text_chunk = ' '.join(text_splitted[i:i+400]) text_chunk = ' '.join(text_splitted[i:i+450])
text_chunk = 'year : ' + year + ' month : ' + month + ' day ' + day_of_month + ' weekday : ' + weekday + ' ' + text_chunk text_chunk = 'year : ' + year + ' month : ' + month + ' day ' + day_of_month + ' weekday : ' + weekday + ' ' + text_chunk
f_hf.write(text_chunk +'\n') f_hf.write(text_chunk +'\n')