From d98383197fe73e191fca2dbee552b4953ee492e4 Mon Sep 17 00:00:00 2001 From: s444415 Date: Fri, 16 Dec 2022 12:32:08 +0000 Subject: [PATCH 01/17] modifcation to train and eval --- donut-eval.py | 34 +++++++++++++++++++--------------- donut-train.py | 8 ++++---- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/donut-eval.py b/donut-eval.py index c5548e6..cbcc0b7 100644 --- a/donut-eval.py +++ b/donut-eval.py @@ -11,21 +11,21 @@ import json import torch from tqdm.auto import tqdm import numpy as np - +import pandas as pd from donut import JSONParseEvaluator # In[2]: -processor = DonutProcessor.from_pretrained("Zombely/plwiki-proto-fine-tuned") -model = VisionEncoderDecoderModel.from_pretrained("Zombely/plwiki-proto-fine-tuned") +processor = DonutProcessor.from_pretrained("Zombely/plwiki-proto-fine-tuned-v2") +model = VisionEncoderDecoderModel.from_pretrained("Zombely/plwiki-proto-fine-tuned-v2") # In[3]: -dataset = load_dataset("Zombely/pl-text-images-5000-whole", split="validation") +dataset = load_dataset("Zombely/diachronia-ocr", split='train') # In[4]: @@ -38,11 +38,11 @@ model.to(device) output_list = [] accs = [] - +has_metadata = bool(dataset[0].get('ground_truth')) for idx, sample in tqdm(enumerate(dataset), total=len(dataset)): # prepare encoder inputs - pixel_values = processor(sample["image"].convert("RGB"), return_tensors="pt").pixel_values + pixel_values = processor(sample['image'].convert("RGB"), return_tensors="pt").pixel_values pixel_values = pixel_values.to(device) # prepare decoder inputs task_prompt = "" @@ -68,16 +68,20 @@ for idx, sample in tqdm(enumerate(dataset), total=len(dataset)): seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "") seq = re.sub(r"<.*?>", "", seq, count=1).strip() # remove first task start token seq = processor.token2json(seq) + if has_metadata: + ground_truth = json.loads(sample["ground_truth"]) + ground_truth = ground_truth["gt_parse"] + evaluator = JSONParseEvaluator() + score = evaluator.cal_acc(seq, ground_truth) - ground_truth = json.loads(sample["ground_truth"]) - ground_truth = ground_truth["gt_parse"] - evaluator = JSONParseEvaluator() - score = evaluator.cal_acc(seq, ground_truth) - - accs.append(score) + accs.append(score) + print(seq) output_list.append(seq) +df = pd.DataFrame(map(lambda x: x.get('text_sequence', ''), output_list)) +df.to_csv('out.tsv', sep='\t', header=False, index=False) -scores = {"accuracies": accs, "mean_accuracy": np.mean(accs)} -print(scores, f"length : {len(accs)}") -print("Mean accuracy:", np.mean(accs)) +if has_metadata: + scores = {"accuracies": accs, "mean_accuracy": np.mean(accs)} + print(scores, f"length : {len(accs)}") + print("Mean accuracy:", np.mean(accs)) diff --git a/donut-train.py b/donut-train.py index 8eaa167..7ba98f0 100644 --- a/donut-train.py +++ b/donut-train.py @@ -22,7 +22,7 @@ from pytorch_lightning.plugins import CheckpointIO DATASET_PATH = "Zombely/pl-text-images-5000-whole" -PRETRAINED_MODEL_PATH = "Zombely/plwiki-proto-fine-tuned" +PRETRAINED_MODEL_PATH = "Zombely/plwiki-proto-fine-tuned-v2" START_MODEL_PATH = "Zombely/plwiki-proto-fine-tuned" OUTPUT_MODEL_PATH = "Zombely/plwiki-proto-fine-tuned-v2" LOGGING_PATH = "plwiki-proto-ft-second-iter" @@ -30,8 +30,8 @@ CHECKPOINT_PATH = "./checkpoint" train_config = { - "max_epochs":30, - "val_check_interval":0.5, # how many times we want to validate during an epoch + "max_epochs":1, + "val_check_interval":1.0, # how many times we want to validate during an epoch "check_val_every_n_epoch":1, "gradient_clip_val":1.0, "num_training_samples_per_epoch": 800, @@ -339,7 +339,7 @@ class PushToHubCallback(Callback): -login(os.environ.get("HUG_TOKKEN", "")) +login(os.environ.get("HUG_TOKKEN", None), True) # ### Wandb.ai link: https://wandb.ai/michalkozlowski936/Donut?workspace=user-michalkozlowski936 From 8ccd1aabb64fd5d49533cff810ca9cc36baed740 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Koz=C5=82owski?= Date: Fri, 16 Dec 2022 13:53:03 +0100 Subject: [PATCH 02/17] config and params for donut-eval --- config.yaml | 7 +++ donut-eval.py | 129 ++++++++++++++++++++++++-------------------------- 2 files changed, 68 insertions(+), 68 deletions(-) create mode 100644 config.yaml diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..cb7e730 --- /dev/null +++ b/config.yaml @@ -0,0 +1,7 @@ +pretrained_processor_path: "Zombely/plwiki-proto-fine-tuned-v2" +pretrained_model_path: "Zombely/plwiki-proto-fine-tuned-v2" +validation_dataset_path: "Zombely/diachronia-ocr" +validation_dataset_split: "train" +has_metadata: False +print_output: True +output_file_dir: "../../gonito-outs" \ No newline at end of file diff --git a/donut-eval.py b/donut-eval.py index cbcc0b7..23efe11 100644 --- a/donut-eval.py +++ b/donut-eval.py @@ -1,9 +1,6 @@ #!/usr/bin/env python # coding: utf-8 -# In[1]: - - from transformers import DonutProcessor, VisionEncoderDecoderModel from datasets import load_dataset import re @@ -13,75 +10,71 @@ from tqdm.auto import tqdm import numpy as np import pandas as pd from donut import JSONParseEvaluator +import argparse +from sconf import Config +def main(config): + processor = DonutProcessor.from_pretrained(config.pretrained_processor_path) + model = VisionEncoderDecoderModel.from_pretrained(config.pretrained_model_path) + dataset = load_dataset(config.validation_dataset_path, split=config.validation_dataset_split) + device = "cuda" if torch.cuda.is_available() else "cpu" + model.eval() + model.to(device) + output_list = [] + accs = [] -# In[2]: + for idx, sample in tqdm(enumerate(dataset), total=len(dataset)): + # prepare encoder inputs + pixel_values = processor(sample['image'].convert("RGB"), return_tensors="pt").pixel_values + pixel_values = pixel_values.to(device) + # prepare decoder inputs + task_prompt = "" + decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids + decoder_input_ids = decoder_input_ids.to(device) + + # autoregressively generate sequence + outputs = model.generate( + pixel_values, + decoder_input_ids=decoder_input_ids, + max_length=model.decoder.config.max_position_embeddings, + early_stopping=True, + pad_token_id=processor.tokenizer.pad_token_id, + eos_token_id=processor.tokenizer.eos_token_id, + use_cache=True, + num_beams=1, + bad_words_ids=[[processor.tokenizer.unk_token_id]], + return_dict_in_generate=True, + ) + # turn into JSON + seq = processor.batch_decode(outputs.sequences)[0] + seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "") + seq = re.sub(r"<.*?>", "", seq, count=1).strip() # remove first task start token + seq = processor.token2json(seq) + if config.has_metadata: + ground_truth = json.loads(sample["ground_truth"]) + ground_truth = ground_truth["gt_parse"] + evaluator = JSONParseEvaluator() + score = evaluator.cal_acc(seq, ground_truth) -processor = DonutProcessor.from_pretrained("Zombely/plwiki-proto-fine-tuned-v2") -model = VisionEncoderDecoderModel.from_pretrained("Zombely/plwiki-proto-fine-tuned-v2") + accs.append(score) + if config.print_output: + print(seq) + output_list.append(seq) + if config.output_file_dir: + df = pd.DataFrame(map(lambda x: x.get('text_sequence', ''), output_list)) + df.to_csv(f'{config.output_file_dir}/{config.pretrained_processor_path}-out.tsv', sep='\t', header=False, index=False) + if config.has_metadata: + scores = {"accuracies": accs, "mean_accuracy": np.mean(accs)} + print(scores, f"length : {len(accs)}") + print("Mean accuracy:", np.mean(accs)) -# In[3]: - - -dataset = load_dataset("Zombely/diachronia-ocr", split='train') - - -# In[4]: - - -device = "cuda" if torch.cuda.is_available() else "cpu" - -model.eval() -model.to(device) - -output_list = [] -accs = [] -has_metadata = bool(dataset[0].get('ground_truth')) - -for idx, sample in tqdm(enumerate(dataset), total=len(dataset)): - # prepare encoder inputs - pixel_values = processor(sample['image'].convert("RGB"), return_tensors="pt").pixel_values - pixel_values = pixel_values.to(device) - # prepare decoder inputs - task_prompt = "" - decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids - decoder_input_ids = decoder_input_ids.to(device) - - # autoregressively generate sequence - outputs = model.generate( - pixel_values, - decoder_input_ids=decoder_input_ids, - max_length=model.decoder.config.max_position_embeddings, - early_stopping=True, - pad_token_id=processor.tokenizer.pad_token_id, - eos_token_id=processor.tokenizer.eos_token_id, - use_cache=True, - num_beams=1, - bad_words_ids=[[processor.tokenizer.unk_token_id]], - return_dict_in_generate=True, - ) - - # turn into JSON - seq = processor.batch_decode(outputs.sequences)[0] - seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "") - seq = re.sub(r"<.*?>", "", seq, count=1).strip() # remove first task start token - seq = processor.token2json(seq) - if has_metadata: - ground_truth = json.loads(sample["ground_truth"]) - ground_truth = ground_truth["gt_parse"] - evaluator = JSONParseEvaluator() - score = evaluator.cal_acc(seq, ground_truth) - - accs.append(score) - print(seq) - output_list.append(seq) -df = pd.DataFrame(map(lambda x: x.get('text_sequence', ''), output_list)) -df.to_csv('out.tsv', sep='\t', header=False, index=False) - -if has_metadata: - scores = {"accuracies": accs, "mean_accuracy": np.mean(accs)} - print(scores, f"length : {len(accs)}") - print("Mean accuracy:", np.mean(accs)) +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--config", type=str, required=True) + args, left_argv = parser.parse_known_args() + config = Config(args.config) + config.argv_update(left_argv) + main(config) \ No newline at end of file From b7296bb2a94d003133d4ca853b12e7c963357740 Mon Sep 17 00:00:00 2001 From: s444415 Date: Fri, 16 Dec 2022 14:16:46 +0000 Subject: [PATCH 03/17] config changes --- config.yaml | 7 ++++--- donut-eval.py | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/config.yaml b/config.yaml index cb7e730..4ff337a 100644 --- a/config.yaml +++ b/config.yaml @@ -1,7 +1,8 @@ -pretrained_processor_path: "Zombely/plwiki-proto-fine-tuned-v2" -pretrained_model_path: "Zombely/plwiki-proto-fine-tuned-v2" +pretrained_processor_path: "naver-clova-ix/donut-proto" +pretrained_model_path: "naver-clova-ix/donut-proto" validation_dataset_path: "Zombely/diachronia-ocr" validation_dataset_split: "train" has_metadata: False print_output: True -output_file_dir: "../../gonito-outs" \ No newline at end of file +output_file_dir: "../../gonito-outs" +test_name: "proto-test" diff --git a/donut-eval.py b/donut-eval.py index 23efe11..5754483 100644 --- a/donut-eval.py +++ b/donut-eval.py @@ -63,7 +63,7 @@ def main(config): output_list.append(seq) if config.output_file_dir: df = pd.DataFrame(map(lambda x: x.get('text_sequence', ''), output_list)) - df.to_csv(f'{config.output_file_dir}/{config.pretrained_processor_path}-out.tsv', sep='\t', header=False, index=False) + df.to_csv(f'{config.output_file_dir}/{config.test_name}-out.tsv', sep='\t', header=False, index=False) if config.has_metadata: scores = {"accuracies": accs, "mean_accuracy": np.mean(accs)} @@ -77,4 +77,4 @@ if __name__ == "__main__": config = Config(args.config) config.argv_update(left_argv) - main(config) \ No newline at end of file + main(config) From de8f89ddb15ace41d14af6ac5606ea90ad4ad19b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Koz=C5=82owski?= Date: Fri, 16 Dec 2022 15:20:10 +0100 Subject: [PATCH 04/17] config for model --- donut-eval.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/donut-eval.py b/donut-eval.py index 5754483..456659a 100644 --- a/donut-eval.py +++ b/donut-eval.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # coding: utf-8 -from transformers import DonutProcessor, VisionEncoderDecoderModel +from transformers import DonutProcessor, VisionEncoderDecoderModel, VisionEncoderDecoderConfig from datasets import load_dataset import re import json @@ -14,8 +14,15 @@ import argparse from sconf import Config def main(config): + + max_length = 768 + image_size = [1920, 2560] + config_vision = VisionEncoderDecoderConfig.from_pretrained(config.pretrained_model_path) + config_vision.encoder.image_size = image_size # (height, width) + config_vision.decoder.max_length = max_length + processor = DonutProcessor.from_pretrained(config.pretrained_processor_path) - model = VisionEncoderDecoderModel.from_pretrained(config.pretrained_model_path) + model = VisionEncoderDecoderModel.from_pretrained(config.pretrained_model_path, config=config) dataset = load_dataset(config.validation_dataset_path, split=config.validation_dataset_split) device = "cuda" if torch.cuda.is_available() else "cpu" model.eval() From ed4cabfe350184da1d759d865e3416bc24bbeb52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Koz=C5=82owski?= Date: Fri, 16 Dec 2022 15:24:04 +0100 Subject: [PATCH 05/17] config update --- config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config.yaml b/config.yaml index 4ff337a..be75a38 100644 --- a/config.yaml +++ b/config.yaml @@ -1,8 +1,8 @@ -pretrained_processor_path: "naver-clova-ix/donut-proto" -pretrained_model_path: "naver-clova-ix/donut-proto" +pretrained_processor_path: "naver-clova-ix/donut-base" +pretrained_model_path: "naver-clova-ix/donut-base" validation_dataset_path: "Zombely/diachronia-ocr" validation_dataset_split: "train" has_metadata: False print_output: True output_file_dir: "../../gonito-outs" -test_name: "proto-test" +test_name: "base-test" From ff2ba1533ad83b2b3cb48aa86f9ec6bcad19cbfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Koz=C5=82owski?= Date: Fri, 16 Dec 2022 15:24:37 +0100 Subject: [PATCH 06/17] fix --- donut-eval.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/donut-eval.py b/donut-eval.py index 456659a..98e59f1 100644 --- a/donut-eval.py +++ b/donut-eval.py @@ -15,11 +15,11 @@ from sconf import Config def main(config): - max_length = 768 - image_size = [1920, 2560] - config_vision = VisionEncoderDecoderConfig.from_pretrained(config.pretrained_model_path) - config_vision.encoder.image_size = image_size # (height, width) - config_vision.decoder.max_length = max_length + # max_length = 768 + # image_size = [1920, 2560] + # config_vision = VisionEncoderDecoderConfig.from_pretrained(config.pretrained_model_path) + # config_vision.encoder.image_size = image_size # (height, width) + # config_vision.decoder.max_length = max_length processor = DonutProcessor.from_pretrained(config.pretrained_processor_path) model = VisionEncoderDecoderModel.from_pretrained(config.pretrained_model_path, config=config) From 53a1db3dce1f29df2fb56b01044d6e19a9d1e205 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Koz=C5=82owski?= Date: Sat, 17 Dec 2022 10:24:47 +0100 Subject: [PATCH 07/17] config fix --- config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config.yaml b/config.yaml index be75a38..0b2a262 100644 --- a/config.yaml +++ b/config.yaml @@ -5,4 +5,4 @@ validation_dataset_split: "train" has_metadata: False print_output: True output_file_dir: "../../gonito-outs" -test_name: "base-test" +test_name: "base-test" \ No newline at end of file From cfcb15e99911a3d74101119ee9e621ac45820b72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Koz=C5=82owski?= Date: Sat, 17 Dec 2022 10:26:21 +0100 Subject: [PATCH 08/17] f --- config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config.yaml b/config.yaml index 0b2a262..1ae36ee 100644 --- a/config.yaml +++ b/config.yaml @@ -1,5 +1,5 @@ -pretrained_processor_path: "naver-clova-ix/donut-base" -pretrained_model_path: "naver-clova-ix/donut-base" +pretrained_processor_path: "Zombely/plwiki-proto-fine-tuned-v2" +pretrained_model_path: "Zombely/plwiki-proto-fine-tuned-v2" validation_dataset_path: "Zombely/diachronia-ocr" validation_dataset_split: "train" has_metadata: False From 31981cfc515fd27ec2b58218373e60bcb0939349 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Koz=C5=82owski?= Date: Sat, 17 Dec 2022 10:27:14 +0100 Subject: [PATCH 09/17] donut eval fix --- donut-eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/donut-eval.py b/donut-eval.py index 98e59f1..bc83f27 100644 --- a/donut-eval.py +++ b/donut-eval.py @@ -22,7 +22,7 @@ def main(config): # config_vision.decoder.max_length = max_length processor = DonutProcessor.from_pretrained(config.pretrained_processor_path) - model = VisionEncoderDecoderModel.from_pretrained(config.pretrained_model_path, config=config) + model = VisionEncoderDecoderModel.from_pretrained(config.pretrained_model_path) dataset = load_dataset(config.validation_dataset_path, split=config.validation_dataset_split) device = "cuda" if torch.cuda.is_available() else "cpu" model.eval() From 3cbed8144e2b0ac696877db4edc15e0e56b26afd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Koz=C5=82owski?= Date: Sat, 17 Dec 2022 10:27:56 +0100 Subject: [PATCH 10/17] naver --- config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config.yaml b/config.yaml index 1ae36ee..0b2a262 100644 --- a/config.yaml +++ b/config.yaml @@ -1,5 +1,5 @@ -pretrained_processor_path: "Zombely/plwiki-proto-fine-tuned-v2" -pretrained_model_path: "Zombely/plwiki-proto-fine-tuned-v2" +pretrained_processor_path: "naver-clova-ix/donut-base" +pretrained_model_path: "naver-clova-ix/donut-base" validation_dataset_path: "Zombely/diachronia-ocr" validation_dataset_split: "train" has_metadata: False From bdb7f5ef7e3f186101ce6b5028af4419690032e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Koz=C5=82owski?= Date: Sat, 17 Dec 2022 10:29:25 +0100 Subject: [PATCH 11/17] additional config --- donut-eval.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/donut-eval.py b/donut-eval.py index bc83f27..bdf7c93 100644 --- a/donut-eval.py +++ b/donut-eval.py @@ -15,14 +15,12 @@ from sconf import Config def main(config): - # max_length = 768 - # image_size = [1920, 2560] - # config_vision = VisionEncoderDecoderConfig.from_pretrained(config.pretrained_model_path) - # config_vision.encoder.image_size = image_size # (height, width) - # config_vision.decoder.max_length = max_length + config_vision = VisionEncoderDecoderConfig.from_pretrained(config.pretrained_model_path) + config_vision.encoder.image_size = [1920, 2560] # (height, width) + config_vision.decoder.max_length = 768 processor = DonutProcessor.from_pretrained(config.pretrained_processor_path) - model = VisionEncoderDecoderModel.from_pretrained(config.pretrained_model_path) + model = VisionEncoderDecoderModel.from_pretrained(config.pretrained_model_path, config=config_vision) dataset = load_dataset(config.validation_dataset_path, split=config.validation_dataset_split) device = "cuda" if torch.cuda.is_available() else "cpu" model.eval() From 296647793ea437c375bd76f111e73ddbdb5d8cb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Koz=C5=82owski?= Date: Sat, 17 Dec 2022 10:33:03 +0100 Subject: [PATCH 12/17] added more config to eval --- donut-eval.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/donut-eval.py b/donut-eval.py index bdf7c93..acc102b 100644 --- a/donut-eval.py +++ b/donut-eval.py @@ -15,12 +15,17 @@ from sconf import Config def main(config): + image_size = [1920, 2560] config_vision = VisionEncoderDecoderConfig.from_pretrained(config.pretrained_model_path) - config_vision.encoder.image_size = [1920, 2560] # (height, width) + config_vision.encoder.image_size = image_size # (height, width) config_vision.decoder.max_length = 768 processor = DonutProcessor.from_pretrained(config.pretrained_processor_path) model = VisionEncoderDecoderModel.from_pretrained(config.pretrained_model_path, config=config_vision) + + processor.image_processor.size = image_size[::-1] # should be (width, height) + processor.image_processor.do_align_long_axis = False + dataset = load_dataset(config.validation_dataset_path, split=config.validation_dataset_split) device = "cuda" if torch.cuda.is_available() else "cpu" model.eval() From cc2e6541cf2638e8196230eb36cfa44396e03141 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Koz=C5=82owski?= Date: Sat, 17 Dec 2022 10:44:55 +0100 Subject: [PATCH 13/17] test --- donut-eval.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/donut-eval.py b/donut-eval.py index acc102b..5a03d45 100644 --- a/donut-eval.py +++ b/donut-eval.py @@ -15,15 +15,15 @@ from sconf import Config def main(config): - image_size = [1920, 2560] - config_vision = VisionEncoderDecoderConfig.from_pretrained(config.pretrained_model_path) - config_vision.encoder.image_size = image_size # (height, width) - config_vision.decoder.max_length = 768 + # image_size = [1920, 2560] + # config_vision = VisionEncoderDecoderConfig.from_pretrained(config.pretrained_model_path) + # config_vision.encoder.image_size = image_size # (height, width) + # config_vision.decoder.max_length = 768 processor = DonutProcessor.from_pretrained(config.pretrained_processor_path) - model = VisionEncoderDecoderModel.from_pretrained(config.pretrained_model_path, config=config_vision) + model = VisionEncoderDecoderModel.from_pretrained(config.pretrained_model_path) - processor.image_processor.size = image_size[::-1] # should be (width, height) + # processor.image_processor.size = image_size[::-1] # should be (width, height) processor.image_processor.do_align_long_axis = False dataset = load_dataset(config.validation_dataset_path, split=config.validation_dataset_split) From 8cf6a48c552f926f1f806ff5c6dd091e8c43878e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Koz=C5=82owski?= Date: Sat, 17 Dec 2022 10:49:49 +0100 Subject: [PATCH 14/17] proto --- config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config.yaml b/config.yaml index 0b2a262..512a31b 100644 --- a/config.yaml +++ b/config.yaml @@ -1,8 +1,8 @@ -pretrained_processor_path: "naver-clova-ix/donut-base" -pretrained_model_path: "naver-clova-ix/donut-base" +pretrained_processor_path: "naver-clova-ix/donut-proto" +pretrained_model_path: "naver-clova-ix/donut-proto" validation_dataset_path: "Zombely/diachronia-ocr" validation_dataset_split: "train" has_metadata: False print_output: True output_file_dir: "../../gonito-outs" -test_name: "base-test" \ No newline at end of file +test_name: "proto-test" \ No newline at end of file From 45d3ac7355b7c45cc810aced53f1c1a18fa363e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Koz=C5=82owski?= Date: Sat, 17 Dec 2022 10:52:31 +0100 Subject: [PATCH 15/17] fine --- config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config.yaml b/config.yaml index 512a31b..9dc286e 100644 --- a/config.yaml +++ b/config.yaml @@ -1,8 +1,8 @@ -pretrained_processor_path: "naver-clova-ix/donut-proto" -pretrained_model_path: "naver-clova-ix/donut-proto" +pretrained_processor_path: "Zombely/plwiki-proto-fine-tuned-v2" +pretrained_model_path: "Zombely/plwiki-proto-fine-tuned-v2" validation_dataset_path: "Zombely/diachronia-ocr" validation_dataset_split: "train" has_metadata: False print_output: True output_file_dir: "../../gonito-outs" -test_name: "proto-test" \ No newline at end of file +test_name: "fine-tuned-test" \ No newline at end of file From 8006c39d83daf47fa9f6ac97479fcbe6e686a302 Mon Sep 17 00:00:00 2001 From: s444415 Date: Tue, 3 Jan 2023 13:15:05 +0100 Subject: [PATCH 16/17] config edit, dataset create script, donut train config edit --- config.yaml => config-eval.yaml | 0 dataset_create.ipynb | 822 ++++++++++++++++++++++++++++++++ donut-train.py | 12 +- 3 files changed, 828 insertions(+), 6 deletions(-) rename config.yaml => config-eval.yaml (100%) create mode 100644 dataset_create.ipynb diff --git a/config.yaml b/config-eval.yaml similarity index 100% rename from config.yaml rename to config-eval.yaml diff --git a/dataset_create.ipynb b/dataset_create.ipynb new file mode 100644 index 0000000..4fbcd73 --- /dev/null +++ b/dataset_create.ipynb @@ -0,0 +1,822 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from huggingface_hub import login\n", + "from datasets import load_dataset\n", + "import os\n", + "import json\n", + "import shutil" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f0476002f8d14822a24f1376cfe29a07", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HTML(value='
Date: Wed, 4 Jan 2023 09:50:56 +0100 Subject: [PATCH 17/17] folder split --- dataset_create.ipynb => notepads/dataset_create.ipynb | 0 donut-eval.ipynb => notepads/donut-eval.ipynb | 0 donut-train.ipynb => notepads/donut-train.ipynb | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename dataset_create.ipynb => notepads/dataset_create.ipynb (100%) rename donut-eval.ipynb => notepads/donut-eval.ipynb (100%) rename donut-train.ipynb => notepads/donut-train.ipynb (100%) diff --git a/dataset_create.ipynb b/notepads/dataset_create.ipynb similarity index 100% rename from dataset_create.ipynb rename to notepads/dataset_create.ipynb diff --git a/donut-eval.ipynb b/notepads/donut-eval.ipynb similarity index 100% rename from donut-eval.ipynb rename to notepads/donut-eval.ipynb diff --git a/donut-train.ipynb b/notepads/donut-train.ipynb similarity index 100% rename from donut-train.ipynb rename to notepads/donut-train.ipynb