This commit is contained in:
mkozlowskiAzimuthe 2023-01-25 18:06:04 +01:00
parent ce564bd1c7
commit 93a231a477

View File

@ -48,6 +48,7 @@ class DonutDataset(Dataset):
self.added_tokens = added_tokens self.added_tokens = added_tokens
self.dataset = load_dataset(dataset_name_or_path, split=self.split, streaming=True).with_format("torch") self.dataset = load_dataset(dataset_name_or_path, split=self.split, streaming=True).with_format("torch")
print(self.dataset)
self.dataset_length = len(self.dataset) self.dataset_length = len(self.dataset)
self.gt_token_sequences = [] self.gt_token_sequences = []