random choice for sequences in streaming dataset
This commit is contained in:
parent
30369a7885
commit
dd39efb763
@ -68,7 +68,7 @@ class DonutDatasetStream:
|
|||||||
pixel_values = pixel_values.squeeze()
|
pixel_values = pixel_values.squeeze()
|
||||||
|
|
||||||
# targets
|
# targets
|
||||||
target_sequence = self.gt_token_sequences # can be more than one, e.g., DocVQA Task 1
|
target_sequence = random.choice(self.gt_token_sequences) # can be more than one, e.g., DocVQA Task 1
|
||||||
input_ids = self.processor.tokenizer(
|
input_ids = self.processor.tokenizer(
|
||||||
target_sequence,
|
target_sequence,
|
||||||
add_special_tokens=False,
|
add_special_tokens=False,
|
||||||
|
Loading…
Reference in New Issue
Block a user