From dd39efb76343cbfc4f110ddb0d7aa8ccede82355 Mon Sep 17 00:00:00 2001 From: "michal.kozlowski" Date: Fri, 17 Mar 2023 14:38:19 +0100 Subject: [PATCH] random choice for sequences in streaming dataset --- utils/donut_dataset_stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/donut_dataset_stream.py b/utils/donut_dataset_stream.py index 62f06ea..b69d0cd 100644 --- a/utils/donut_dataset_stream.py +++ b/utils/donut_dataset_stream.py @@ -68,7 +68,7 @@ class DonutDatasetStream: pixel_values = pixel_values.squeeze() # targets - target_sequence = self.gt_token_sequences # can be more than one, e.g., DocVQA Task 1 + target_sequence = random.choice(self.gt_token_sequences) # can be more than one, e.g., DocVQA Task 1 input_ids = self.processor.tokenizer( target_sequence, add_special_tokens=False,