random choice for sequences in streaming dataset

This commit is contained in:
michal.kozlowski 2023-03-17 14:38:19 +01:00
parent 30369a7885
commit dd39efb763

View File

@ -68,7 +68,7 @@ class DonutDatasetStream:
pixel_values = pixel_values.squeeze()
# targets
target_sequence = self.gt_token_sequences # can be more than one, e.g., DocVQA Task 1
target_sequence = random.choice(self.gt_token_sequences) # can be more than one, e.g., DocVQA Task 1
input_ids = self.processor.tokenizer(
target_sequence,
add_special_tokens=False,