From 54124892969ee2ae4eed0a11b36ceaa016b8966f Mon Sep 17 00:00:00 2001 From: s487194 Date: Thu, 1 Feb 2024 03:10:09 +0100 Subject: [PATCH] change base to large --- ...er.ipynb => T5_encoder_decoder-large.ipynb | 104 +++++++++--------- 1 file changed, 52 insertions(+), 52 deletions(-) rename T5_encoder_decoder.ipynb => T5_encoder_decoder-large.ipynb (88%) diff --git a/T5_encoder_decoder.ipynb b/T5_encoder_decoder-large.ipynb similarity index 88% rename from T5_encoder_decoder.ipynb rename to T5_encoder_decoder-large.ipynb index 91312cd..fcef64e 100644 --- a/T5_encoder_decoder.ipynb +++ b/T5_encoder_decoder-large.ipynb @@ -16,7 +16,7 @@ "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { - "7cd0b9f562c3497fa8521900361aa1b6": { + "4f448c953d694cb1a6f2371044699c53": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", @@ -31,14 +31,14 @@ "_view_name": "HBoxView", "box_style": "", "children": [ - "IPY_MODEL_6a752c40c5ed4a848718fc5515ba3ff6", - "IPY_MODEL_a3c49c91d53b4a0fbaa06cf9edc53902", - "IPY_MODEL_8f42cebfbb2c44b5b65dc403950ab7b6" + "IPY_MODEL_a0ebb2e5f804420fa162bcd8d21c1618", + "IPY_MODEL_1fa01b1aaf614e8bbb80ea50c1f7e896", + "IPY_MODEL_c5ac6f5b66254013961fc4c23fb5b9d0" ], - "layout": "IPY_MODEL_66fdc95b84a7421f8394ff0ccabfd4ee" + "layout": "IPY_MODEL_3df8072a2c5d49768645de6a469cfaac" } }, - "6a752c40c5ed4a848718fc5515ba3ff6": { + "a0ebb2e5f804420fa162bcd8d21c1618": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", @@ -53,13 +53,13 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_bab672d9adef477a8d6db292e5217ae2", + "layout": "IPY_MODEL_978d779455244554a0c218f6d35a3c42", "placeholder": "​", - "style": "IPY_MODEL_bb31a82ad4df4a0bbdcdc7fee5c333f0", + "style": "IPY_MODEL_e16ed92825334d56bcba384f6f0c97f8", "value": "Map: 100%" } }, - "a3c49c91d53b4a0fbaa06cf9edc53902": { + "1fa01b1aaf614e8bbb80ea50c1f7e896": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", @@ -75,15 +75,15 @@ "bar_style": "success", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_44645ba3efc546949f8de88a69109319", + "layout": "IPY_MODEL_f7485d8be1a147e4abead66435df3d42", "max": 1024, "min": 0, "orientation": "horizontal", - "style": "IPY_MODEL_e2d98d9e52f5402db83ac5290b6b5e66", + "style": "IPY_MODEL_10e58cef76084e7085256f88b002e043", "value": 1024 } }, - "8f42cebfbb2c44b5b65dc403950ab7b6": { + "c5ac6f5b66254013961fc4c23fb5b9d0": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", @@ -98,13 +98,13 @@ "_view_name": "HTMLView", "description": "", "description_tooltip": null, - "layout": "IPY_MODEL_24cca48948be45779949b96cd63a70c8", + "layout": "IPY_MODEL_17ae8ff247d54a8eb5799d5d1b2be5a5", "placeholder": "​", - "style": "IPY_MODEL_8647bedfaa684f139d3534552a3fb493", - "value": " 1024/1024 [00:00<00:00, 1828.40 examples/s]" + "style": "IPY_MODEL_71deaf885bf149bc85c529738585965e", + "value": " 1024/1024 [00:00<00:00, 3663.19 examples/s]" } }, - "66fdc95b84a7421f8394ff0ccabfd4ee": { + "3df8072a2c5d49768645de6a469cfaac": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -156,7 +156,7 @@ "width": null } }, - "bab672d9adef477a8d6db292e5217ae2": { + "978d779455244554a0c218f6d35a3c42": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -208,7 +208,7 @@ "width": null } }, - "bb31a82ad4df4a0bbdcdc7fee5c333f0": { + "e16ed92825334d56bcba384f6f0c97f8": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", @@ -223,7 +223,7 @@ "description_width": "" } }, - "44645ba3efc546949f8de88a69109319": { + "f7485d8be1a147e4abead66435df3d42": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -275,7 +275,7 @@ "width": null } }, - "e2d98d9e52f5402db83ac5290b6b5e66": { + "10e58cef76084e7085256f88b002e043": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", @@ -291,7 +291,7 @@ "description_width": "" } }, - "24cca48948be45779949b96cd63a70c8": { + "17ae8ff247d54a8eb5799d5d1b2be5a5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", @@ -343,7 +343,7 @@ "width": null } }, - "8647bedfaa684f139d3534552a3fb493": { + "71deaf885bf149bc85c529738585965e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", @@ -372,7 +372,7 @@ "base_uri": "https://localhost:8080/" }, "id": "0ju_uJOjZLE3", - "outputId": "b3836512-aed0-4e7f-81c8-895d595b589a" + "outputId": "d05040a2-b04c-4b88-e616-c88a9c47895a" }, "execution_count": 1, "outputs": [ @@ -459,7 +459,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "d34d1f53-77af-4e37-d501-f8144ad417e0" + "outputId": "47c03859-2c6b-4855-a995-e08e8bcd5140" }, "execution_count": 3, "outputs": [ @@ -480,7 +480,7 @@ { "cell_type": "code", "source": [ - "model_name = \"t5-base\"\n", + "model_name = \"t5-large\"\n", "tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = 128)\n", "\n", "def tokenize_function(examples):\n", @@ -495,21 +495,21 @@ "base_uri": "https://localhost:8080/", "height": 49, "referenced_widgets": [ - "7cd0b9f562c3497fa8521900361aa1b6", - "6a752c40c5ed4a848718fc5515ba3ff6", - "a3c49c91d53b4a0fbaa06cf9edc53902", - "8f42cebfbb2c44b5b65dc403950ab7b6", - "66fdc95b84a7421f8394ff0ccabfd4ee", - "bab672d9adef477a8d6db292e5217ae2", - "bb31a82ad4df4a0bbdcdc7fee5c333f0", - "44645ba3efc546949f8de88a69109319", - "e2d98d9e52f5402db83ac5290b6b5e66", - "24cca48948be45779949b96cd63a70c8", - "8647bedfaa684f139d3534552a3fb493" + "4f448c953d694cb1a6f2371044699c53", + "a0ebb2e5f804420fa162bcd8d21c1618", + "1fa01b1aaf614e8bbb80ea50c1f7e896", + "c5ac6f5b66254013961fc4c23fb5b9d0", + "3df8072a2c5d49768645de6a469cfaac", + "978d779455244554a0c218f6d35a3c42", + "e16ed92825334d56bcba384f6f0c97f8", + "f7485d8be1a147e4abead66435df3d42", + "10e58cef76084e7085256f88b002e043", + "17ae8ff247d54a8eb5799d5d1b2be5a5", + "71deaf885bf149bc85c529738585965e" ] }, "id": "zFaiMEblaTFy", - "outputId": "d397d1d4-6b4d-4913-de53-788d2d4c698c" + "outputId": "da51b9d9-f4ed-4518-9a4e-d8eeb5267de6" }, "execution_count": 4, "outputs": [ @@ -522,7 +522,7 @@ "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, - "model_id": "7cd0b9f562c3497fa8521900361aa1b6" + "model_id": "4f448c953d694cb1a6f2371044699c53" } }, "metadata": {} @@ -540,7 +540,7 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "75930083-b0a0-4f4c-840d-35222e898719" + "outputId": "f886b927-e3a2-48dd-9c88-200c20f841d9" }, "execution_count": 5, "outputs": [ @@ -548,7 +548,7 @@ "output_type": "stream", "name": "stderr", "text": [ - "Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at t5-base and are newly initialized: ['classification_head.out_proj.bias', 'classification_head.out_proj.weight', 'classification_head.dense.weight', 'classification_head.dense.bias']\n", + "Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at t5-large and are newly initialized: ['classification_head.out_proj.weight', 'classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } @@ -572,19 +572,19 @@ "metadata": { "id": "hpRwZ0QhrV92" }, - "execution_count": 44, + "execution_count": 6, "outputs": [] }, { "cell_type": "code", "source": [ - "train_arguments = TrainingArguments(num_train_epochs=1, per_device_train_batch_size=32, output_dir=\"./Output\", evaluation_strategy=\"epoch\",)\n", + "train_arguments = TrainingArguments(num_train_epochs=1, per_device_train_batch_size=8, output_dir=\"./Output\", evaluation_strategy=\"epoch\",)\n", "trainer=Trainer(model=model, args=train_arguments, train_dataset=tokenized_dataset_train, eval_dataset=tokenized_dataset_eval, compute_metrics=compute_metrics)" ], "metadata": { "id": "P4hpYtvHbRex" }, - "execution_count": 45, + "execution_count": 11, "outputs": [] }, { @@ -598,9 +598,9 @@ "base_uri": "https://localhost:8080/", "height": 141 }, - "outputId": "ed0f490c-f168-4382-8211-c05ee12be011" + "outputId": "7ec665e1-1db5-49ee-a482-13ca53836418" }, - "execution_count": 46, + "execution_count": 12, "outputs": [ { "output_type": "display_data", @@ -612,8 +612,8 @@ "\n", "
\n", " \n", - " \n", - " [158/158 04:34, Epoch 1/1]\n", + " \n", + " [632/632 18:44, Epoch 1/1]\n", "
\n", " \n", " \n", @@ -627,9 +627,9 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", "
1No log1.6165160.8886720.4536000.3526710.904297

" @@ -641,11 +641,11 @@ "output_type": "execute_result", "data": { "text/plain": [ - "TrainOutput(global_step=158, training_loss=0.03489681135250043, metrics={'train_runtime': 276.2183, 'train_samples_per_second': 18.29, 'train_steps_per_second': 0.572, 'total_flos': 771417209622528.0, 'train_loss': 0.03489681135250043, 'epoch': 1.0})" + "TrainOutput(global_step=632, training_loss=0.4112016822718367, metrics={'train_runtime': 1126.6685, 'train_samples_per_second': 4.484, 'train_steps_per_second': 0.561, 'total_flos': 2738543023411200.0, 'train_loss': 0.4112016822718367, 'epoch': 1.0})" ] }, "metadata": {}, - "execution_count": 46 + "execution_count": 12 } ] }