back to normal

This commit is contained in:
Michał Kozłowski 2023-01-24 18:23:25 +01:00
parent fcc09e70d6
commit 1c22eaabf9
2 changed files with 3 additions and 3 deletions

View File

@ -34,7 +34,7 @@ def main(config, hug_token):
added_tokens = []
dataset = load_dataset(config.dataset_path, split='train', streaming=True)
dataset = load_dataset(config.dataset_path, split='train')
validation_dataset = dataset.take(100)
train_dataset = dataset.skip(10000)

View File

@ -47,8 +47,8 @@ class DonutDataset(Dataset):
self.sort_json_key = sort_json_key
self.added_tokens = added_tokens
self.dataset = dataset.with_format("torch")
# self.dataset_length = len(self.dataset.with_format("torch"))
self.dataset = dataset
self.dataset_length = len(self.dataset)
self.gt_token_sequences = []
for sample in self.dataset: