diff --git a/utils/donut_dataset_stream.py b/utils/donut_dataset_stream.py index f295c8d..e10a0fa 100644 --- a/utils/donut_dataset_stream.py +++ b/utils/donut_dataset_stream.py @@ -48,6 +48,7 @@ class DonutDataset(Dataset): self.added_tokens = added_tokens self.dataset = load_dataset(dataset_name_or_path, split=self.split, streaming=True).with_format("torch") + print(self.dataset) self.dataset_length = len(self.dataset) self.gt_token_sequences = []