From 8006c39d83daf47fa9f6ac97479fcbe6e686a302 Mon Sep 17 00:00:00 2001 From: s444415 Date: Tue, 3 Jan 2023 13:15:05 +0100 Subject: [PATCH] config edit, dataset create script, donut train config edit --- config.yaml => config-eval.yaml | 0 dataset_create.ipynb | 822 ++++++++++++++++++++++++++++++++ donut-train.py | 12 +- 3 files changed, 828 insertions(+), 6 deletions(-) rename config.yaml => config-eval.yaml (100%) create mode 100644 dataset_create.ipynb diff --git a/config.yaml b/config-eval.yaml similarity index 100% rename from config.yaml rename to config-eval.yaml diff --git a/dataset_create.ipynb b/dataset_create.ipynb new file mode 100644 index 0000000..4fbcd73 --- /dev/null +++ b/dataset_create.ipynb @@ -0,0 +1,822 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from huggingface_hub import login\n", + "from datasets import load_dataset\n", + "import os\n", + "import json\n", + "import shutil" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f0476002f8d14822a24f1376cfe29a07", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "VBox(children=(HTML(value='