change base to large
This commit is contained in:
parent
b9f590cf3f
commit
5412489296
@ -16,7 +16,7 @@
|
||||
"accelerator": "GPU",
|
||||
"widgets": {
|
||||
"application/vnd.jupyter.widget-state+json": {
|
||||
"7cd0b9f562c3497fa8521900361aa1b6": {
|
||||
"4f448c953d694cb1a6f2371044699c53": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_name": "HBoxModel",
|
||||
"model_module_version": "1.5.0",
|
||||
@ -31,14 +31,14 @@
|
||||
"_view_name": "HBoxView",
|
||||
"box_style": "",
|
||||
"children": [
|
||||
"IPY_MODEL_6a752c40c5ed4a848718fc5515ba3ff6",
|
||||
"IPY_MODEL_a3c49c91d53b4a0fbaa06cf9edc53902",
|
||||
"IPY_MODEL_8f42cebfbb2c44b5b65dc403950ab7b6"
|
||||
"IPY_MODEL_a0ebb2e5f804420fa162bcd8d21c1618",
|
||||
"IPY_MODEL_1fa01b1aaf614e8bbb80ea50c1f7e896",
|
||||
"IPY_MODEL_c5ac6f5b66254013961fc4c23fb5b9d0"
|
||||
],
|
||||
"layout": "IPY_MODEL_66fdc95b84a7421f8394ff0ccabfd4ee"
|
||||
"layout": "IPY_MODEL_3df8072a2c5d49768645de6a469cfaac"
|
||||
}
|
||||
},
|
||||
"6a752c40c5ed4a848718fc5515ba3ff6": {
|
||||
"a0ebb2e5f804420fa162bcd8d21c1618": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_name": "HTMLModel",
|
||||
"model_module_version": "1.5.0",
|
||||
@ -53,13 +53,13 @@
|
||||
"_view_name": "HTMLView",
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_bab672d9adef477a8d6db292e5217ae2",
|
||||
"layout": "IPY_MODEL_978d779455244554a0c218f6d35a3c42",
|
||||
"placeholder": "",
|
||||
"style": "IPY_MODEL_bb31a82ad4df4a0bbdcdc7fee5c333f0",
|
||||
"style": "IPY_MODEL_e16ed92825334d56bcba384f6f0c97f8",
|
||||
"value": "Map: 100%"
|
||||
}
|
||||
},
|
||||
"a3c49c91d53b4a0fbaa06cf9edc53902": {
|
||||
"1fa01b1aaf614e8bbb80ea50c1f7e896": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_name": "FloatProgressModel",
|
||||
"model_module_version": "1.5.0",
|
||||
@ -75,15 +75,15 @@
|
||||
"bar_style": "success",
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_44645ba3efc546949f8de88a69109319",
|
||||
"layout": "IPY_MODEL_f7485d8be1a147e4abead66435df3d42",
|
||||
"max": 1024,
|
||||
"min": 0,
|
||||
"orientation": "horizontal",
|
||||
"style": "IPY_MODEL_e2d98d9e52f5402db83ac5290b6b5e66",
|
||||
"style": "IPY_MODEL_10e58cef76084e7085256f88b002e043",
|
||||
"value": 1024
|
||||
}
|
||||
},
|
||||
"8f42cebfbb2c44b5b65dc403950ab7b6": {
|
||||
"c5ac6f5b66254013961fc4c23fb5b9d0": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_name": "HTMLModel",
|
||||
"model_module_version": "1.5.0",
|
||||
@ -98,13 +98,13 @@
|
||||
"_view_name": "HTMLView",
|
||||
"description": "",
|
||||
"description_tooltip": null,
|
||||
"layout": "IPY_MODEL_24cca48948be45779949b96cd63a70c8",
|
||||
"layout": "IPY_MODEL_17ae8ff247d54a8eb5799d5d1b2be5a5",
|
||||
"placeholder": "",
|
||||
"style": "IPY_MODEL_8647bedfaa684f139d3534552a3fb493",
|
||||
"value": " 1024/1024 [00:00<00:00, 1828.40 examples/s]"
|
||||
"style": "IPY_MODEL_71deaf885bf149bc85c529738585965e",
|
||||
"value": " 1024/1024 [00:00<00:00, 3663.19 examples/s]"
|
||||
}
|
||||
},
|
||||
"66fdc95b84a7421f8394ff0ccabfd4ee": {
|
||||
"3df8072a2c5d49768645de6a469cfaac": {
|
||||
"model_module": "@jupyter-widgets/base",
|
||||
"model_name": "LayoutModel",
|
||||
"model_module_version": "1.2.0",
|
||||
@ -156,7 +156,7 @@
|
||||
"width": null
|
||||
}
|
||||
},
|
||||
"bab672d9adef477a8d6db292e5217ae2": {
|
||||
"978d779455244554a0c218f6d35a3c42": {
|
||||
"model_module": "@jupyter-widgets/base",
|
||||
"model_name": "LayoutModel",
|
||||
"model_module_version": "1.2.0",
|
||||
@ -208,7 +208,7 @@
|
||||
"width": null
|
||||
}
|
||||
},
|
||||
"bb31a82ad4df4a0bbdcdc7fee5c333f0": {
|
||||
"e16ed92825334d56bcba384f6f0c97f8": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_name": "DescriptionStyleModel",
|
||||
"model_module_version": "1.5.0",
|
||||
@ -223,7 +223,7 @@
|
||||
"description_width": ""
|
||||
}
|
||||
},
|
||||
"44645ba3efc546949f8de88a69109319": {
|
||||
"f7485d8be1a147e4abead66435df3d42": {
|
||||
"model_module": "@jupyter-widgets/base",
|
||||
"model_name": "LayoutModel",
|
||||
"model_module_version": "1.2.0",
|
||||
@ -275,7 +275,7 @@
|
||||
"width": null
|
||||
}
|
||||
},
|
||||
"e2d98d9e52f5402db83ac5290b6b5e66": {
|
||||
"10e58cef76084e7085256f88b002e043": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_name": "ProgressStyleModel",
|
||||
"model_module_version": "1.5.0",
|
||||
@ -291,7 +291,7 @@
|
||||
"description_width": ""
|
||||
}
|
||||
},
|
||||
"24cca48948be45779949b96cd63a70c8": {
|
||||
"17ae8ff247d54a8eb5799d5d1b2be5a5": {
|
||||
"model_module": "@jupyter-widgets/base",
|
||||
"model_name": "LayoutModel",
|
||||
"model_module_version": "1.2.0",
|
||||
@ -343,7 +343,7 @@
|
||||
"width": null
|
||||
}
|
||||
},
|
||||
"8647bedfaa684f139d3534552a3fb493": {
|
||||
"71deaf885bf149bc85c529738585965e": {
|
||||
"model_module": "@jupyter-widgets/controls",
|
||||
"model_name": "DescriptionStyleModel",
|
||||
"model_module_version": "1.5.0",
|
||||
@ -372,7 +372,7 @@
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"id": "0ju_uJOjZLE3",
|
||||
"outputId": "b3836512-aed0-4e7f-81c8-895d595b589a"
|
||||
"outputId": "d05040a2-b04c-4b88-e616-c88a9c47895a"
|
||||
},
|
||||
"execution_count": 1,
|
||||
"outputs": [
|
||||
@ -459,7 +459,7 @@
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"outputId": "d34d1f53-77af-4e37-d501-f8144ad417e0"
|
||||
"outputId": "47c03859-2c6b-4855-a995-e08e8bcd5140"
|
||||
},
|
||||
"execution_count": 3,
|
||||
"outputs": [
|
||||
@ -480,7 +480,7 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"model_name = \"t5-base\"\n",
|
||||
"model_name = \"t5-large\"\n",
|
||||
"tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length = 128)\n",
|
||||
"\n",
|
||||
"def tokenize_function(examples):\n",
|
||||
@ -495,21 +495,21 @@
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 49,
|
||||
"referenced_widgets": [
|
||||
"7cd0b9f562c3497fa8521900361aa1b6",
|
||||
"6a752c40c5ed4a848718fc5515ba3ff6",
|
||||
"a3c49c91d53b4a0fbaa06cf9edc53902",
|
||||
"8f42cebfbb2c44b5b65dc403950ab7b6",
|
||||
"66fdc95b84a7421f8394ff0ccabfd4ee",
|
||||
"bab672d9adef477a8d6db292e5217ae2",
|
||||
"bb31a82ad4df4a0bbdcdc7fee5c333f0",
|
||||
"44645ba3efc546949f8de88a69109319",
|
||||
"e2d98d9e52f5402db83ac5290b6b5e66",
|
||||
"24cca48948be45779949b96cd63a70c8",
|
||||
"8647bedfaa684f139d3534552a3fb493"
|
||||
"4f448c953d694cb1a6f2371044699c53",
|
||||
"a0ebb2e5f804420fa162bcd8d21c1618",
|
||||
"1fa01b1aaf614e8bbb80ea50c1f7e896",
|
||||
"c5ac6f5b66254013961fc4c23fb5b9d0",
|
||||
"3df8072a2c5d49768645de6a469cfaac",
|
||||
"978d779455244554a0c218f6d35a3c42",
|
||||
"e16ed92825334d56bcba384f6f0c97f8",
|
||||
"f7485d8be1a147e4abead66435df3d42",
|
||||
"10e58cef76084e7085256f88b002e043",
|
||||
"17ae8ff247d54a8eb5799d5d1b2be5a5",
|
||||
"71deaf885bf149bc85c529738585965e"
|
||||
]
|
||||
},
|
||||
"id": "zFaiMEblaTFy",
|
||||
"outputId": "d397d1d4-6b4d-4913-de53-788d2d4c698c"
|
||||
"outputId": "da51b9d9-f4ed-4518-9a4e-d8eeb5267de6"
|
||||
},
|
||||
"execution_count": 4,
|
||||
"outputs": [
|
||||
@ -522,7 +522,7 @@
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"version_major": 2,
|
||||
"version_minor": 0,
|
||||
"model_id": "7cd0b9f562c3497fa8521900361aa1b6"
|
||||
"model_id": "4f448c953d694cb1a6f2371044699c53"
|
||||
}
|
||||
},
|
||||
"metadata": {}
|
||||
@ -540,7 +540,7 @@
|
||||
"colab": {
|
||||
"base_uri": "https://localhost:8080/"
|
||||
},
|
||||
"outputId": "75930083-b0a0-4f4c-840d-35222e898719"
|
||||
"outputId": "f886b927-e3a2-48dd-9c88-200c20f841d9"
|
||||
},
|
||||
"execution_count": 5,
|
||||
"outputs": [
|
||||
@ -548,7 +548,7 @@
|
||||
"output_type": "stream",
|
||||
"name": "stderr",
|
||||
"text": [
|
||||
"Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at t5-base and are newly initialized: ['classification_head.out_proj.bias', 'classification_head.out_proj.weight', 'classification_head.dense.weight', 'classification_head.dense.bias']\n",
|
||||
"Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at t5-large and are newly initialized: ['classification_head.out_proj.weight', 'classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias']\n",
|
||||
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
|
||||
]
|
||||
}
|
||||
@ -572,19 +572,19 @@
|
||||
"metadata": {
|
||||
"id": "hpRwZ0QhrV92"
|
||||
},
|
||||
"execution_count": 44,
|
||||
"execution_count": 6,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"train_arguments = TrainingArguments(num_train_epochs=1, per_device_train_batch_size=32, output_dir=\"./Output\", evaluation_strategy=\"epoch\",)\n",
|
||||
"train_arguments = TrainingArguments(num_train_epochs=1, per_device_train_batch_size=8, output_dir=\"./Output\", evaluation_strategy=\"epoch\",)\n",
|
||||
"trainer=Trainer(model=model, args=train_arguments, train_dataset=tokenized_dataset_train, eval_dataset=tokenized_dataset_eval, compute_metrics=compute_metrics)"
|
||||
],
|
||||
"metadata": {
|
||||
"id": "P4hpYtvHbRex"
|
||||
},
|
||||
"execution_count": 45,
|
||||
"execution_count": 11,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
@ -598,9 +598,9 @@
|
||||
"base_uri": "https://localhost:8080/",
|
||||
"height": 141
|
||||
},
|
||||
"outputId": "ed0f490c-f168-4382-8211-c05ee12be011"
|
||||
"outputId": "7ec665e1-1db5-49ee-a482-13ca53836418"
|
||||
},
|
||||
"execution_count": 46,
|
||||
"execution_count": 12,
|
||||
"outputs": [
|
||||
{
|
||||
"output_type": "display_data",
|
||||
@ -612,8 +612,8 @@
|
||||
"\n",
|
||||
" <div>\n",
|
||||
" \n",
|
||||
" <progress value='158' max='158' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
||||
" [158/158 04:34, Epoch 1/1]\n",
|
||||
" <progress value='632' max='632' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
||||
" [632/632 18:44, Epoch 1/1]\n",
|
||||
" </div>\n",
|
||||
" <table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
@ -627,9 +627,9 @@
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>No log</td>\n",
|
||||
" <td>1.616516</td>\n",
|
||||
" <td>0.888672</td>\n",
|
||||
" <td>0.453600</td>\n",
|
||||
" <td>0.352671</td>\n",
|
||||
" <td>0.904297</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table><p>"
|
||||
@ -641,11 +641,11 @@
|
||||
"output_type": "execute_result",
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"TrainOutput(global_step=158, training_loss=0.03489681135250043, metrics={'train_runtime': 276.2183, 'train_samples_per_second': 18.29, 'train_steps_per_second': 0.572, 'total_flos': 771417209622528.0, 'train_loss': 0.03489681135250043, 'epoch': 1.0})"
|
||||
"TrainOutput(global_step=632, training_loss=0.4112016822718367, metrics={'train_runtime': 1126.6685, 'train_samples_per_second': 4.484, 'train_steps_per_second': 0.561, 'total_flos': 2738543023411200.0, 'train_loss': 0.4112016822718367, 'epoch': 1.0})"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"execution_count": 46
|
||||
"execution_count": 12
|
||||
}
|
||||
]
|
||||
}
|
Loading…
Reference in New Issue
Block a user