From ab804f0c9ebc33b0bf988a0ff495b29f550616be Mon Sep 17 00:00:00 2001 From: Jakub Pokrywka Date: Sat, 6 Nov 2021 15:44:43 +0100 Subject: [PATCH] change word limit from 400 to 450 --- regular_roberta_from_scratch/00_create_datasets.py | 4 ++-- roberta_with_year_from_scratch/00_create_datasets.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/regular_roberta_from_scratch/00_create_datasets.py b/regular_roberta_from_scratch/00_create_datasets.py index 026cf0b..80a0f97 100644 --- a/regular_roberta_from_scratch/00_create_datasets.py +++ b/regular_roberta_from_scratch/00_create_datasets.py @@ -31,7 +31,7 @@ for dataset in 'train', 'dev-0': text = text.replace('\\n','') text_splitted = text.split(' ') - for i in range(0, len(text_splitted), 400): - text_chunk = ' '.join(text_splitted[i:i+400]) + for i in range(0, len(text_splitted), 450): + text_chunk = ' '.join(text_splitted[i:i+450]) f_hf.write(text_chunk +'\n') diff --git a/roberta_with_year_from_scratch/00_create_datasets.py b/roberta_with_year_from_scratch/00_create_datasets.py index 09dc41b..03171f9 100644 --- a/roberta_with_year_from_scratch/00_create_datasets.py +++ b/roberta_with_year_from_scratch/00_create_datasets.py @@ -31,8 +31,8 @@ for dataset in 'train', 'dev-0': text = text.replace('\\n','') text_splitted = text.split(' ') - for i in range(0, len(text_splitted), 400): - text_chunk = ' '.join(text_splitted[i:i+400]) + for i in range(0, len(text_splitted), 450): + text_chunk = ' '.join(text_splitted[i:i+450]) text_chunk = 'year : ' + year + ' month : ' + month + ' day ' + day_of_month + ' weekday : ' + weekday + ' ' + text_chunk f_hf.write(text_chunk +'\n')