From dda04a1d5cf9f04144e57976bd5a0300b2c9688f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Koz=C5=82owski?= Date: Wed, 25 Jan 2023 22:18:39 +0100 Subject: [PATCH] 80% split --- train_stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/train_stream.py b/train_stream.py index 95c8fd7..561828d 100644 --- a/train_stream.py +++ b/train_stream.py @@ -34,7 +34,7 @@ def main(config, hug_token): added_tokens = [] - dataset = load_dataset(config.dataset_path) + dataset = load_dataset(config.dataset_path, split="train[:80%]") dataset = dataset.train_test_split(test_size=0.1) train_dataset = DonutDataset(