streaming
This commit is contained in:
parent
679048f88a
commit
cb3c3e46cb
@ -47,7 +47,7 @@ class DonutDataset(Dataset):
|
|||||||
self.sort_json_key = sort_json_key
|
self.sort_json_key = sort_json_key
|
||||||
self.added_tokens = added_tokens
|
self.added_tokens = added_tokens
|
||||||
|
|
||||||
self.dataset = load_dataset(dataset_name_or_path, split=self.split, stream=True).with_format("torch")
|
self.dataset = load_dataset(dataset_name_or_path, split=self.split, streaming=True).with_format("torch")
|
||||||
self.dataset_length = len(self.dataset)
|
self.dataset_length = len(self.dataset)
|
||||||
|
|
||||||
self.gt_token_sequences = []
|
self.gt_token_sequences = []
|
||||||
|
Loading…
Reference in New Issue
Block a user