This commit is contained in:
Michał Kozłowski 2023-01-24 18:16:44 +01:00
parent 9ba7e24ebd
commit c7b2f1d20a

View File

@ -35,8 +35,8 @@ def main(config, hug_token):
added_tokens = [] added_tokens = []
dataset = load_dataset(config.dataset_path, split='train', streaming=True) dataset = load_dataset(config.dataset_path, split='train', streaming=True)
train_dataset = dataset.skip(100)
validation_dataset = dataset.take(100) validation_dataset = dataset.take(100)
train_dataset = dataset.skip(10000)
train_dataset = DonutDataset( train_dataset = DonutDataset(
train_dataset, train_dataset,