back to normal
This commit is contained in:
parent
fcc09e70d6
commit
1c22eaabf9
2
train.py
2
train.py
@ -34,7 +34,7 @@ def main(config, hug_token):
|
||||
|
||||
added_tokens = []
|
||||
|
||||
dataset = load_dataset(config.dataset_path, split='train', streaming=True)
|
||||
dataset = load_dataset(config.dataset_path, split='train')
|
||||
validation_dataset = dataset.take(100)
|
||||
train_dataset = dataset.skip(10000)
|
||||
|
||||
|
@ -47,8 +47,8 @@ class DonutDataset(Dataset):
|
||||
self.sort_json_key = sort_json_key
|
||||
self.added_tokens = added_tokens
|
||||
|
||||
self.dataset = dataset.with_format("torch")
|
||||
# self.dataset_length = len(self.dataset.with_format("torch"))
|
||||
self.dataset = dataset
|
||||
self.dataset_length = len(self.dataset)
|
||||
|
||||
self.gt_token_sequences = []
|
||||
for sample in self.dataset:
|
||||
|
Loading…
Reference in New Issue
Block a user