Merge branch 'vm-changes'

2023-01-04 09:52:13 +01:00 · 2023-01-04 09:52:13 +01:00 · ccd4090d4b
commit ccd4090d4b
parent d52050336f 0de83d1603
6 changed files with 906 additions and 69 deletions
--- a/config-eval.yaml
+++ b/config-eval.yaml
@ -0,0 +1,8 @@
+pretrained_processor_path: "Zombely/plwiki-proto-fine-tuned-v2"
+pretrained_model_path: "Zombely/plwiki-proto-fine-tuned-v2"
+validation_dataset_path: "Zombely/diachronia-ocr"
+validation_dataset_split: "train"
+has_metadata: False
+print_output: True
+output_file_dir: "../../gonito-outs"
+test_name: "fine-tuned-test"
--- a/donut-eval.py
+++ b/donut-eval.py
@ -1,83 +1,90 @@
 #!/usr/bin/env python
 # coding: utf-8

-# In[1]:
-
-
-from transformers import DonutProcessor, VisionEncoderDecoderModel
+from transformers import DonutProcessor, VisionEncoderDecoderModel, VisionEncoderDecoderConfig
 from datasets import load_dataset
 import re
 import json
 import torch
 from tqdm.auto import tqdm
 import numpy as np
-
+import pandas as pd
 from donut import JSONParseEvaluator
+import argparse
+from sconf import Config

+def main(config):

-# In[2]:
+    # image_size = [1920, 2560]
+    # config_vision = VisionEncoderDecoderConfig.from_pretrained(config.pretrained_model_path)
+    # config_vision.encoder.image_size = image_size # (height, width)
+    # config_vision.decoder.max_length = 768

+    processor = DonutProcessor.from_pretrained(config.pretrained_processor_path)
+    model = VisionEncoderDecoderModel.from_pretrained(config.pretrained_model_path)

-processor = DonutProcessor.from_pretrained("Zombely/plwiki-proto-fine-tuned")
-model = VisionEncoderDecoderModel.from_pretrained("Zombely/plwiki-proto-fine-tuned")
+    # processor.image_processor.size = image_size[::-1] # should be (width, height)
+    processor.image_processor.do_align_long_axis = False

+    dataset = load_dataset(config.validation_dataset_path, split=config.validation_dataset_split)
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model.eval()
+    model.to(device)
+    output_list = []
+    accs = []

-# In[3]:
+    for idx, sample in tqdm(enumerate(dataset), total=len(dataset)):
+        # prepare encoder inputs
+        pixel_values = processor(sample['image'].convert("RGB"), return_tensors="pt").pixel_values
+        pixel_values = pixel_values.to(device)
+        # prepare decoder inputs
+        task_prompt = "<s_cord-v2>"
+        decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
+        decoder_input_ids = decoder_input_ids.to(device)
+        
+        # autoregressively generate sequence
+        outputs = model.generate(
+                pixel_values,
+                decoder_input_ids=decoder_input_ids,
+                max_length=model.decoder.config.max_position_embeddings,
+                early_stopping=True,
+                pad_token_id=processor.tokenizer.pad_token_id,
+                eos_token_id=processor.tokenizer.eos_token_id,
+                use_cache=True,
+                num_beams=1,
+                bad_words_ids=[[processor.tokenizer.unk_token_id]],
+                return_dict_in_generate=True,
+            )

+        # turn into JSON
+        seq = processor.batch_decode(outputs.sequences)[0]
+        seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
+        seq = re.sub(r"<.*?>", "", seq, count=1).strip()  # remove first task start token
+        seq = processor.token2json(seq)
+        if config.has_metadata:
+            ground_truth = json.loads(sample["ground_truth"])
+            ground_truth = ground_truth["gt_parse"]
+            evaluator = JSONParseEvaluator()
+            score = evaluator.cal_acc(seq, ground_truth)

-dataset = load_dataset("Zombely/pl-text-images-5000-whole", split="validation")
+            accs.append(score)
+        if config.print_output:
+            print(seq)
+        output_list.append(seq)
+    if config.output_file_dir:
+        df = pd.DataFrame(map(lambda x: x.get('text_sequence', ''), output_list))
+        df.to_csv(f'{config.output_file_dir}/{config.test_name}-out.tsv', sep='\t', header=False, index=False)

+    if config.has_metadata:
+        scores = {"accuracies": accs, "mean_accuracy": np.mean(accs)}
+        print(scores, f"length : {len(accs)}")
+        print("Mean accuracy:", np.mean(accs))

-# In[4]:
-
-
-device = "cuda" if torch.cuda.is_available() else "cpu"
-
-model.eval()
-model.to(device)
-
-output_list = []
-accs = []
-
-
-for idx, sample in tqdm(enumerate(dataset), total=len(dataset)):
-    # prepare encoder inputs
-    pixel_values = processor(sample["image"].convert("RGB"), return_tensors="pt").pixel_values
-    pixel_values = pixel_values.to(device)
-    # prepare decoder inputs
-    task_prompt = "<s_cord-v2>"
-    decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
-    decoder_input_ids = decoder_input_ids.to(device)
-    
-    # autoregressively generate sequence
-    outputs = model.generate(
-            pixel_values,
-            decoder_input_ids=decoder_input_ids,
-            max_length=model.decoder.config.max_position_embeddings,
-            early_stopping=True,
-            pad_token_id=processor.tokenizer.pad_token_id,
-            eos_token_id=processor.tokenizer.eos_token_id,
-            use_cache=True,
-            num_beams=1,
-            bad_words_ids=[[processor.tokenizer.unk_token_id]],
-            return_dict_in_generate=True,
-        )
-
-    # turn into JSON
-    seq = processor.batch_decode(outputs.sequences)[0]
-    seq = seq.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
-    seq = re.sub(r"<.*?>", "", seq, count=1).strip()  # remove first task start token
-    seq = processor.token2json(seq)
-
-    ground_truth = json.loads(sample["ground_truth"])
-    ground_truth = ground_truth["gt_parse"]
-    evaluator = JSONParseEvaluator()
-    score = evaluator.cal_acc(seq, ground_truth)
-
-    accs.append(score)
-    output_list.append(seq)
-
-scores = {"accuracies": accs, "mean_accuracy": np.mean(accs)}
-print(scores, f"length : {len(accs)}")
-print("Mean accuracy:", np.mean(accs))
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config", type=str, required=True)
+    args, left_argv = parser.parse_known_args()
+    config = Config(args.config)
+    config.argv_update(left_argv)

+    main(config)
--- a/donut-train.py
+++ b/donut-train.py
@ -21,16 +21,16 @@ from pytorch_lightning.plugins import CheckpointIO



-DATASET_PATH = "Zombely/pl-text-images-5000-whole"
-PRETRAINED_MODEL_PATH = "Zombely/plwiki-proto-fine-tuned"
-START_MODEL_PATH = "Zombely/plwiki-proto-fine-tuned"
-OUTPUT_MODEL_PATH = "Zombely/plwiki-proto-fine-tuned-v2"
-LOGGING_PATH = "plwiki-proto-ft-second-iter"
+DATASET_PATH = "Zombely/fiszki-ocr-train"
+PRETRAINED_MODEL_PATH = "Zombely/plwiki-proto-fine-tuned-v2"
+START_MODEL_PATH = "Zombely/plwiki-proto-fine-tuned-v2"
+OUTPUT_MODEL_PATH = "Zombely/plwiki-proto-fine-tuned-v3"
+LOGGING_PATH = "fiszki-ocr-fine-tune"
 CHECKPOINT_PATH = "./checkpoint"


 train_config = {
-    "max_epochs":30,
+    "max_epochs":1,
    "val_check_interval":0.5, # how many times we want to validate during an epoch
    "check_val_every_n_epoch":1,
    "gradient_clip_val":1.0,
@ -362,7 +362,7 @@ custom_ckpt = CustomCheckpointIO()


 trainer = pl.Trainer(
-        accelerator="gpu", # change to gpu
+        accelerator="gpu" if torch.cuda.is_available() else 'cpu', # change to gpu
        devices=1,
        max_epochs=train_config.get("max_epochs"),
        val_check_interval=train_config.get("val_check_interval"),
--- a/notepads/dataset_create.ipynb
+++ b/notepads/dataset_create.ipynb
@ -0,0 +1,822 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from huggingface_hub import login\n",
+    "from datasets import load_dataset\n",
+    "import os\n",
+    "import json\n",
+    "import shutil"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f0476002f8d14822a24f1376cfe29a07",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "login(os.environ.get(\"HUG_TOKKEN\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_train = pd.read_csv('../fiszki-ocr/train/in.tsv', sep='\\t', header=None, index_col=False)\n",
+    "files = [file[0] for file in df_train.iloc()]\n",
+    "df_train_out = pd.read_csv('../fiszki-ocr/train/expected.tsv', sep='\\t', header=None, index_col=False)\n",
+    "files_out = [file_out[0] for file_out in df_train_out.iloc()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "whole = []\n",
+    "for file, out in zip(files, files_out):\n",
+    "        whole.append({\"file_name\": file, \"ground_truth\": json.dumps({\"gt_parse\": {\"text_sequance\": out}}, ensure_ascii=False)})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train = whole[:85]\n",
+    "validation = whole[85:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_files = [file.get(\"file_name\") for file in train]\n",
+    "validation_files = [file.get(\"file_name\") for file in validation]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for image in os.listdir(\"../fiszki-ocr/images\"):\n",
+    "    if image in train_files:\n",
+    "        shutil.copy(f\"/home/pc/work/fiszki-ocr/images/{image}\", f\"./images-split-fiszki/train/{image}\")\n",
+    "    if image in validation_files:\n",
+    "        shutil.copy(f\"/home/pc/work/fiszki-ocr/images/{image}\", f\"./images-split-fiszki/validation/{image}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "with open('./images-split-fiszki/train/metadata.jsonl', 'w', encoding='utf-8') as f:\n",
+    "    for entry in train:\n",
+    "        json.dump(entry, f, ensure_ascii=False)\n",
+    "        f.write(\"\\n\")\n",
+    "with open('./images-split-fiszki/validation/metadata.jsonl', 'w', encoding='utf-8') as f:\n",
+    "    for entry in validation:\n",
+    "        json.dump(entry, f, ensure_ascii=False)\n",
+    "        f.write(\"\\n\")\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ca154573c11a44a8a1fa7dede4c54e26",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Resolving data files:   0%|          | 0/86 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using custom data configuration images-split-fiszki-0b6e02834f7867a1\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading and preparing dataset imagefolder/images-split-fiszki to /home/pc/.cache/huggingface/datasets/imagefolder/images-split-fiszki-0b6e02834f7867a1/0.0.0/37fbb85cc714a338bea574ac6c7d0b5be5aff46c1862c1989b20e0771199e93f...\n",
+      "                "
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2677f9a18a4d40768ebfee41eb5ee208",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #4:   0%|          | 0/6 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b742285b54724ef895dc3f1c76510030",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #1:   0%|          | 0/6 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "27b3c6bbb7fe4220b20a13c6b720b99e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #15:   0%|          | 0/5 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b9961292c96c404582fe522ff8d93e1d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #10:   0%|          | 0/5 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d496ade67a244136b1fe5a00e539dc9f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #6:   0%|          | 0/6 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "61b0ebdef7814d0ab6f9fa796b67f033",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #14:   0%|          | 0/5 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9040a6b8a24f4ab793d0cf459f5f35b3",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #2:   0%|          | 0/6 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e4e6f1800d37456ebc095f7a096082fe",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #7:   0%|          | 0/6 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "badee192d70a4d109cf38b3539876221",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #8:   0%|          | 0/5 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "05801dc38fd24f4382f488c8a3fa92bc",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #3:   0%|          | 0/6 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "704b4bd67b044e9c8d3cb009df4be325",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #0:   0%|          | 0/6 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ee591babc11e479c8263368893964589",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #9:   0%|          | 0/5 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2e6e2f9a00774a6ba35330a0e1104968",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #5:   0%|          | 0/6 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c97fd7b70b544c068d13eef90ad05127",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #11:   0%|          | 0/5 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9d9741e14c7945c4aac512ebe6effbba",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #12:   0%|          | 0/5 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f0ec07904f434cf7b8d7e98702979c83",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #13:   0%|          | 0/5 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3881282964584fe8906257ca4edb825b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3d543609550c438c891b36e2406cb1ae",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Extracting data files: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                "
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d48e39b33fb74375894bff21bd91dd56",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #2:   0%|          | 0/1 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3e8add0aead64b06b4b630a9e3cd7614",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #4:   0%|          | 0/1 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7e3311255c414944965ac6d19e3520bb",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #3:   0%|          | 0/1 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "89fe0a3793d0442ab9d91a98e39b05f1",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #0:   0%|          | 0/2 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e89ba8660b684c028d15b5b62f22c3ba",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #5:   0%|          | 0/1 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "db6d55a219704ffa8f73a31d928fe47e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #1:   0%|          | 0/1 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ca45c0d6589d4b858a9914ef9f8845d4",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #9:   0%|          | 0/1 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "45ea9e09950d4ec0b0529db382b14d6f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #10:   0%|          | 0/1 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "cfeb70d806d344b683aa9e772b468e6e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #14:   0%|          | 0/1 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3dd158c57c8c46b19b86ddd7e31915fd",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #15:   0%|          | 0/1 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "367863a5314d494f929aae0ca91e0a33",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #7:   0%|          | 0/1 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a1a0e3b6a0234188b34c11fae2f6503d",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #6:   0%|          | 0/1 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d8e9df2e9ace4365b3e6faf80c2b7cbb",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #11:   0%|          | 0/1 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6df8b58b7a934f8eaf0422ce9f704d38",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #13:   0%|          | 0/1 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d1f58d8da7f24d6394e2c2ace1372d92",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #8:   0%|          | 0/1 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "208c67f7ffb64f548726bbe2443f6930",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files #12:   0%|          | 0/1 [00:00<?, ?obj/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "905e84687479471daaadfd9850c52a88",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading data files: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3db88be1336b4ea0b03638761a6d69e7",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Extracting data files: 0it [00:00, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "96090b6f92eb46be8b44dde7d96f225a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generating train split: 0 examples [00:00, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "419401799c864422b9669c66c44159bd",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generating validation split: 0 examples [00:00, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dataset imagefolder downloaded and prepared to /home/pc/.cache/huggingface/datasets/imagefolder/images-split-fiszki-0b6e02834f7867a1/0.0.0/37fbb85cc714a338bea574ac6c7d0b5be5aff46c1862c1989b20e0771199e93f. Subsequent calls will reuse this data.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "835ebdd301dc469dbb0ad6f1838403a5",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "dataset = load_dataset('./images-split-fiszki')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Pushing split train to the Hub.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a92c7f7732054b479a26b1f32621cf20",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?ba/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d934bd92d4af41e492ecac230b635903",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Pushing split validation to the Hub.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "dd0c40c587e84870ad9c3089d401b80a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?ba/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "eeac023e001349a48133da21f7656378",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "dataset.push_to_hub(\"Zombely/fiszki-ocr-train\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "hug_donut",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.15 (main, Nov  4 2022, 16:13:54) \n[GCC 11.2.0]"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "8f1c1b41577d000ca6512e75d22d324bbd1d5e060e99f4f49d98cf0adf636690"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notepads/donut-eval.ipynb
+++ b/notepads/donut-eval.ipynb
--- a/notepads/donut-train.ipynb
+++ b/notepads/donut-train.ipynb