train stream fix

This commit is contained in:
michal.kozlowski 2023-03-14 21:13:02 +01:00
parent c474b560aa
commit 7fadabea91

View File

@ -79,7 +79,7 @@ def main(config, hug_token):
return obj return obj
def process(row, split): def process(row, split):
task_start_token, prompt_end_token = "<s_cord-v2>" task_start_token, prompt_end_token = "<s_cord-v2>", "<s_cord-v2>"
ground_truth = json.loads(row["ground_truth"]) ground_truth = json.loads(row["ground_truth"])
if "gt_parses" in ground_truth: # when multiple ground truths are available, e.g., docvqa if "gt_parses" in ground_truth: # when multiple ground truths are available, e.g., docvqa
assert isinstance(ground_truth["gt_parses"], list) assert isinstance(ground_truth["gt_parses"], list)