5608 lines
319 KiB
Plaintext
5608 lines
319 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# GPT2"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"metadata": {
|
||
|
"pycharm": {
|
||
|
"is_executing": true
|
||
|
}
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"02/16/2022 00:13:42 - INFO - __main__ - Distributed environment: NO\n",
|
||
|
"Num processes: 1\n",
|
||
|
"Process index: 0\n",
|
||
|
"Local process index: 0\n",
|
||
|
"Device: cpu\n",
|
||
|
"Use FP16 precision: False\n",
|
||
|
"\n",
|
||
|
"02/16/2022 00:13:43 - WARNING - datasets.builder - Using custom data configuration default-67c9d932a627b7b8\n",
|
||
|
"02/16/2022 00:13:43 - WARNING - datasets.builder - Reusing dataset json (C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426)\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/3 [00:00<?, ?it/s]\n",
|
||
|
"100%|##########| 3/3 [00:00<00:00, 1491.40it/s]\n",
|
||
|
"loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
|
||
|
"Model config GPT2Config {\n",
|
||
|
" \"activation_function\": \"gelu_new\",\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"GPT2LMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"attn_pdrop\": 0.1,\n",
|
||
|
" \"bos_token_id\": 50256,\n",
|
||
|
" \"embd_pdrop\": 0.1,\n",
|
||
|
" \"eos_token_id\": 50256,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-05,\n",
|
||
|
" \"model_type\": \"gpt2\",\n",
|
||
|
" \"n_ctx\": 1024,\n",
|
||
|
" \"n_embd\": 768,\n",
|
||
|
" \"n_head\": 12,\n",
|
||
|
" \"n_inner\": null,\n",
|
||
|
" \"n_layer\": 12,\n",
|
||
|
" \"n_positions\": 1024,\n",
|
||
|
" \"reorder_and_upcast_attn\": false,\n",
|
||
|
" \"resid_pdrop\": 0.1,\n",
|
||
|
" \"scale_attn_by_inverse_layer_idx\": false,\n",
|
||
|
" \"scale_attn_weights\": true,\n",
|
||
|
" \"summary_activation\": null,\n",
|
||
|
" \"summary_first_dropout\": 0.1,\n",
|
||
|
" \"summary_proj_to_labels\": true,\n",
|
||
|
" \"summary_type\": \"cls_index\",\n",
|
||
|
" \"summary_use_proj\": true,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"text-generation\": {\n",
|
||
|
" \"do_sample\": true,\n",
|
||
|
" \"max_length\": 50\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50257\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
|
||
|
"loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
|
||
|
"Model config GPT2Config {\n",
|
||
|
" \"activation_function\": \"gelu_new\",\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"GPT2LMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"attn_pdrop\": 0.1,\n",
|
||
|
" \"bos_token_id\": 50256,\n",
|
||
|
" \"embd_pdrop\": 0.1,\n",
|
||
|
" \"eos_token_id\": 50256,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-05,\n",
|
||
|
" \"model_type\": \"gpt2\",\n",
|
||
|
" \"n_ctx\": 1024,\n",
|
||
|
" \"n_embd\": 768,\n",
|
||
|
" \"n_head\": 12,\n",
|
||
|
" \"n_inner\": null,\n",
|
||
|
" \"n_layer\": 12,\n",
|
||
|
" \"n_positions\": 1024,\n",
|
||
|
" \"reorder_and_upcast_attn\": false,\n",
|
||
|
" \"resid_pdrop\": 0.1,\n",
|
||
|
" \"scale_attn_by_inverse_layer_idx\": false,\n",
|
||
|
" \"scale_attn_weights\": true,\n",
|
||
|
" \"summary_activation\": null,\n",
|
||
|
" \"summary_first_dropout\": 0.1,\n",
|
||
|
" \"summary_proj_to_labels\": true,\n",
|
||
|
" \"summary_type\": \"cls_index\",\n",
|
||
|
" \"summary_use_proj\": true,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"text-generation\": {\n",
|
||
|
" \"do_sample\": true,\n",
|
||
|
" \"max_length\": 50\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50257\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/vocab.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\684fe667923972fb57f6b4dcb61a3c92763ad89882f3da5da9866baf14f2d60f.c7ed1f96aac49e745788faa77ba0a26a392643a50bb388b9c04ff469e555241f\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/merges.txt from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\c0c761a63004025aeadd530c4c27b860ec4ecbe8a00531233de21d865a402598.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/tokenizer.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\16a2f78023c8dc511294f0c97b5e10fde3ef9889ad6d11ffaa2a00714e73926e.cf2d0ecb83b6df91b3dbb53f1d1e4c311578bfd3aa0e04934215a49bf9898df0\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/added_tokens.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/special_tokens_map.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/tokenizer_config.json from cache at None\n",
|
||
|
"loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
|
||
|
"Model config GPT2Config {\n",
|
||
|
" \"activation_function\": \"gelu_new\",\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"GPT2LMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"attn_pdrop\": 0.1,\n",
|
||
|
" \"bos_token_id\": 50256,\n",
|
||
|
" \"embd_pdrop\": 0.1,\n",
|
||
|
" \"eos_token_id\": 50256,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-05,\n",
|
||
|
" \"model_type\": \"gpt2\",\n",
|
||
|
" \"n_ctx\": 1024,\n",
|
||
|
" \"n_embd\": 768,\n",
|
||
|
" \"n_head\": 12,\n",
|
||
|
" \"n_inner\": null,\n",
|
||
|
" \"n_layer\": 12,\n",
|
||
|
" \"n_positions\": 1024,\n",
|
||
|
" \"reorder_and_upcast_attn\": false,\n",
|
||
|
" \"resid_pdrop\": 0.1,\n",
|
||
|
" \"scale_attn_by_inverse_layer_idx\": false,\n",
|
||
|
" \"scale_attn_weights\": true,\n",
|
||
|
" \"summary_activation\": null,\n",
|
||
|
" \"summary_first_dropout\": 0.1,\n",
|
||
|
" \"summary_proj_to_labels\": true,\n",
|
||
|
" \"summary_type\": \"cls_index\",\n",
|
||
|
" \"summary_use_proj\": true,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"text-generation\": {\n",
|
||
|
" \"do_sample\": true,\n",
|
||
|
" \"max_length\": 50\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50257\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"02/16/2022 00:13:48 - INFO - __main__ - Return hidden states from model: False\n",
|
||
|
"02/16/2022 00:13:48 - INFO - __main__ - Using implementation from: AutoModelForSequenceClassification\n",
|
||
|
"loading weights file https://huggingface.co/gpt2/resolve/main/pytorch_model.bin from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\752929ace039baa8ef70fe21cdf9ab9445773d20e733cf693d667982e210837e.323c769945a351daa25546176f8208b3004b6f563438a7603e7932bae9025925\n",
|
||
|
"All model checkpoint weights were used when initializing GPT2ForSequenceClassification.\n",
|
||
|
"\n",
|
||
|
"Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']\n",
|
||
|
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
||
|
"Using pad_token, but it is not set yet.\n",
|
||
|
"02/16/2022 00:13:50 - INFO - __main__ - Set PAD token to EOS: <|endoftext|>\n",
|
||
|
"02/16/2022 00:13:50 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-18c6f53370629db4.arrow\n",
|
||
|
"02/16/2022 00:13:50 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-da48038acf63cb08.arrow\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/1 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 1/1 [00:00<00:00, 55.70ba/s]\n",
|
||
|
"02/16/2022 00:13:50 - INFO - __main__ - Sample 2755 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [2435, 284, 651, 3772, 0, 340, 338, 264, 3658, 6184, 108, 126, 253, 126, 240, 126, 246, 220, 220, 220, 1303, 82, 3658, 1303, 10464, 437, 220], 'labels': 0}.\n",
|
||
|
"02/16/2022 00:13:50 - INFO - __main__ - Sample 2054 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [31, 7220, 2488, 7220, 220, 909, 1689, 1222, 696, 26, 8406, 268, 389, 262, 749, 1303, 17096, 11186, 220, 1893, 1222, 696, 26, 410, 79, 287, 2106, 13, 1303, 40954], 'labels': 1}.\n",
|
||
|
"02/16/2022 00:13:50 - INFO - __main__ - Sample 551 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [31, 7220, 523, 318, 340, 572, 605, 326, 1303, 73, 15515, 389, 8720, 220, 287, 262, 2951, 286, 262, 1303, 8019, 83, 446, 14568, 30, 220], 'labels': 1}.\n",
|
||
|
"02/16/2022 00:13:51 - INFO - __main__ - ***** Running training *****\n",
|
||
|
"02/16/2022 00:13:51 - INFO - __main__ - Num examples = 4742\n",
|
||
|
"02/16/2022 00:13:51 - INFO - __main__ - Num Epochs = 1\n",
|
||
|
"02/16/2022 00:13:51 - INFO - __main__ - Instantaneous batch size per device = 24\n",
|
||
|
"02/16/2022 00:13:51 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 24\n",
|
||
|
"02/16/2022 00:13:51 - INFO - __main__ - Gradient Accumulation steps = 1\n",
|
||
|
"02/16/2022 00:13:51 - INFO - __main__ - Total optimization steps = 198\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/198 [00:00<?, ?it/s]\n",
|
||
|
" 1%| | 1/198 [00:03<10:40, 3.25s/it]\n",
|
||
|
" 1%|1 | 2/198 [00:06<10:12, 3.12s/it]\n",
|
||
|
" 2%|1 | 3/198 [00:10<11:42, 3.60s/it]\n",
|
||
|
" 2%|2 | 4/198 [00:13<10:52, 3.37s/it]\n",
|
||
|
" 3%|2 | 5/198 [00:17<11:54, 3.70s/it]\n",
|
||
|
" 3%|3 | 6/198 [00:25<16:09, 5.05s/it]\n",
|
||
|
" 4%|3 | 7/198 [00:30<16:15, 5.11s/it]\n",
|
||
|
" 4%|4 | 8/198 [00:37<18:02, 5.70s/it]\n",
|
||
|
" 5%|4 | 9/198 [00:42<17:15, 5.48s/it]\n",
|
||
|
" 5%|5 | 10/198 [00:48<17:15, 5.51s/it]\n",
|
||
|
" 6%|5 | 11/198 [00:51<15:20, 4.92s/it]\n",
|
||
|
" 6%|6 | 12/198 [00:55<13:50, 4.47s/it]\n",
|
||
|
" 7%|6 | 13/198 [01:00<14:10, 4.60s/it]\n",
|
||
|
" 7%|7 | 14/198 [01:02<12:23, 4.04s/it]\n",
|
||
|
" 8%|7 | 15/198 [01:10<15:20, 5.03s/it]\n",
|
||
|
" 8%|8 | 16/198 [01:14<14:41, 4.84s/it]\n",
|
||
|
" 9%|8 | 17/198 [01:18<13:32, 4.49s/it]\n",
|
||
|
" 9%|9 | 18/198 [01:21<12:04, 4.03s/it]\n",
|
||
|
" 10%|9 | 19/198 [01:24<11:16, 3.78s/it]\n",
|
||
|
" 10%|# | 20/198 [01:27<10:43, 3.61s/it]\n",
|
||
|
" 11%|# | 21/198 [01:31<11:11, 3.79s/it]\n",
|
||
|
" 11%|#1 | 22/198 [01:35<10:44, 3.66s/it]\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
" 12%|#1 | 23/198 [01:40<11:44, 4.02s/it]\n",
|
||
|
" 12%|#2 | 24/198 [01:44<11:37, 4.01s/it]\n",
|
||
|
" 13%|#2 | 25/198 [01:47<10:46, 3.74s/it]\n",
|
||
|
" 13%|#3 | 26/198 [01:51<11:16, 3.93s/it]\n",
|
||
|
" 14%|#3 | 27/198 [01:55<11:30, 4.04s/it]\n",
|
||
|
" 14%|#4 | 28/198 [02:00<11:42, 4.13s/it]\n",
|
||
|
" 15%|#4 | 29/198 [02:03<10:55, 3.88s/it]\n",
|
||
|
" 15%|#5 | 30/198 [02:07<10:44, 3.84s/it]\n",
|
||
|
" 16%|#5 | 31/198 [02:10<10:06, 3.63s/it]\n",
|
||
|
" 16%|#6 | 32/198 [02:13<10:00, 3.62s/it]\n",
|
||
|
" 17%|#6 | 33/198 [02:17<09:35, 3.49s/it]\n",
|
||
|
" 17%|#7 | 34/198 [02:21<10:11, 3.73s/it]\n",
|
||
|
" 18%|#7 | 35/198 [02:25<10:17, 3.79s/it]\n",
|
||
|
" 18%|#8 | 36/198 [02:28<09:29, 3.51s/it]\n",
|
||
|
" 19%|#8 | 37/198 [02:33<11:12, 4.18s/it]\n",
|
||
|
" 19%|#9 | 38/198 [02:36<10:13, 3.84s/it]\n",
|
||
|
" 20%|#9 | 39/198 [02:40<10:02, 3.79s/it]\n",
|
||
|
" 20%|## | 40/198 [02:44<10:18, 3.92s/it]\n",
|
||
|
" 21%|## | 41/198 [02:48<09:38, 3.68s/it]\n",
|
||
|
" 21%|##1 | 42/198 [02:52<10:11, 3.92s/it]\n",
|
||
|
" 22%|##1 | 43/198 [02:58<11:44, 4.55s/it]\n",
|
||
|
" 22%|##2 | 44/198 [03:02<11:02, 4.30s/it]\n",
|
||
|
" 23%|##2 | 45/198 [03:06<11:16, 4.42s/it]\n",
|
||
|
" 23%|##3 | 46/198 [03:09<10:02, 3.96s/it]\n",
|
||
|
" 24%|##3 | 47/198 [03:13<09:44, 3.87s/it]\n",
|
||
|
" 24%|##4 | 48/198 [03:16<08:55, 3.57s/it]\n",
|
||
|
" 25%|##4 | 49/198 [03:21<09:54, 3.99s/it]\n",
|
||
|
" 25%|##5 | 50/198 [03:28<12:26, 5.04s/it]\n",
|
||
|
" 26%|##5 | 51/198 [03:32<11:09, 4.55s/it]\n",
|
||
|
" 26%|##6 | 52/198 [03:35<10:14, 4.21s/it]\n",
|
||
|
" 27%|##6 | 53/198 [03:39<09:42, 4.02s/it]\n",
|
||
|
" 27%|##7 | 54/198 [03:46<11:52, 4.95s/it]\n",
|
||
|
" 28%|##7 | 55/198 [03:49<10:34, 4.44s/it]\n",
|
||
|
" 28%|##8 | 56/198 [03:51<09:02, 3.82s/it]\n",
|
||
|
" 29%|##8 | 57/198 [03:56<09:16, 3.95s/it]\n",
|
||
|
" 29%|##9 | 58/198 [03:59<08:56, 3.83s/it]\n",
|
||
|
" 30%|##9 | 59/198 [04:02<08:02, 3.47s/it]\n",
|
||
|
" 30%|### | 60/198 [04:05<07:40, 3.34s/it]\n",
|
||
|
" 31%|### | 61/198 [04:12<10:15, 4.49s/it]\n",
|
||
|
" 31%|###1 | 62/198 [04:14<08:45, 3.86s/it]\n",
|
||
|
" 32%|###1 | 63/198 [04:19<08:55, 3.97s/it]\n",
|
||
|
" 32%|###2 | 64/198 [04:23<09:05, 4.07s/it]\n",
|
||
|
" 33%|###2 | 65/198 [04:27<09:05, 4.10s/it]\n",
|
||
|
" 33%|###3 | 66/198 [04:31<09:04, 4.12s/it]\n",
|
||
|
" 34%|###3 | 67/198 [04:34<08:15, 3.79s/it]\n",
|
||
|
" 34%|###4 | 68/198 [04:37<07:34, 3.50s/it]\n",
|
||
|
" 35%|###4 | 69/198 [04:44<09:48, 4.56s/it]\n",
|
||
|
" 35%|###5 | 70/198 [04:47<08:53, 4.17s/it]\n",
|
||
|
" 36%|###5 | 71/198 [04:52<08:49, 4.17s/it]\n",
|
||
|
" 36%|###6 | 72/198 [04:56<08:46, 4.18s/it]\n",
|
||
|
" 37%|###6 | 73/198 [04:59<08:01, 3.85s/it]\n",
|
||
|
" 37%|###7 | 74/198 [05:02<07:26, 3.60s/it]\n",
|
||
|
" 38%|###7 | 75/198 [05:08<08:49, 4.31s/it]\n",
|
||
|
" 38%|###8 | 76/198 [05:12<08:36, 4.23s/it]\n",
|
||
|
" 39%|###8 | 77/198 [05:16<08:33, 4.24s/it]\n",
|
||
|
" 39%|###9 | 78/198 [05:20<08:29, 4.25s/it]\n",
|
||
|
" 40%|###9 | 79/198 [05:23<07:37, 3.84s/it]\n",
|
||
|
" 40%|#### | 80/198 [05:26<06:55, 3.52s/it]\n",
|
||
|
" 41%|#### | 81/198 [05:30<07:07, 3.66s/it]\n",
|
||
|
" 41%|####1 | 82/198 [05:33<06:32, 3.39s/it]\n",
|
||
|
" 42%|####1 | 83/198 [05:37<06:40, 3.48s/it]\n",
|
||
|
" 42%|####2 | 84/198 [05:41<07:02, 3.71s/it]\n",
|
||
|
" 43%|####2 | 85/198 [05:44<06:27, 3.43s/it]\n",
|
||
|
" 43%|####3 | 86/198 [05:48<06:54, 3.70s/it]\n",
|
||
|
" 44%|####3 | 87/198 [05:52<07:09, 3.87s/it]\n",
|
||
|
" 44%|####4 | 88/198 [05:55<06:29, 3.54s/it]\n",
|
||
|
" 45%|####4 | 89/198 [06:01<07:48, 4.29s/it]\n",
|
||
|
" 45%|####5 | 90/198 [06:04<06:50, 3.80s/it]\n",
|
||
|
" 46%|####5 | 91/198 [06:06<06:05, 3.42s/it]\n",
|
||
|
" 46%|####6 | 92/198 [06:09<05:42, 3.23s/it]\n",
|
||
|
" 47%|####6 | 93/198 [06:13<05:59, 3.42s/it]\n",
|
||
|
" 47%|####7 | 94/198 [06:16<05:45, 3.33s/it]\n",
|
||
|
" 48%|####7 | 95/198 [06:18<05:16, 3.07s/it]\n",
|
||
|
" 48%|####8 | 96/198 [06:22<05:13, 3.07s/it]\n",
|
||
|
" 49%|####8 | 97/198 [06:25<05:12, 3.09s/it]\n",
|
||
|
" 49%|####9 | 98/198 [06:28<05:03, 3.03s/it]\n",
|
||
|
" 50%|##### | 99/198 [06:31<05:00, 3.03s/it]\n",
|
||
|
" 51%|##### | 100/198 [06:33<04:53, 3.00s/it]\n",
|
||
|
" 51%|#####1 | 101/198 [06:36<04:50, 2.99s/it]\n",
|
||
|
" 52%|#####1 | 102/198 [06:41<05:21, 3.35s/it]\n",
|
||
|
" 52%|#####2 | 103/198 [06:45<05:39, 3.57s/it]\n",
|
||
|
" 53%|#####2 | 104/198 [06:48<05:28, 3.49s/it]\n",
|
||
|
" 53%|#####3 | 105/198 [06:54<06:28, 4.18s/it]\n",
|
||
|
" 54%|#####3 | 106/198 [06:56<05:42, 3.72s/it]\n",
|
||
|
" 54%|#####4 | 107/198 [07:00<05:21, 3.53s/it]\n",
|
||
|
" 55%|#####4 | 108/198 [07:02<04:57, 3.30s/it]\n",
|
||
|
" 55%|#####5 | 109/198 [07:05<04:32, 3.06s/it]\n",
|
||
|
" 56%|#####5 | 110/198 [07:08<04:23, 2.99s/it]\n",
|
||
|
" 56%|#####6 | 111/198 [07:10<04:10, 2.88s/it]\n",
|
||
|
" 57%|#####6 | 112/198 [07:13<04:00, 2.80s/it]\n",
|
||
|
" 57%|#####7 | 113/198 [07:16<04:15, 3.01s/it]\n",
|
||
|
" 58%|#####7 | 114/198 [07:20<04:23, 3.13s/it]\n",
|
||
|
" 58%|#####8 | 115/198 [07:23<04:26, 3.21s/it]\n",
|
||
|
" 59%|#####8 | 116/198 [07:26<04:18, 3.15s/it]\n",
|
||
|
" 59%|#####9 | 117/198 [07:30<04:21, 3.22s/it]\n",
|
||
|
" 60%|#####9 | 118/198 [07:37<05:52, 4.41s/it]\n",
|
||
|
" 60%|###### | 119/198 [07:42<06:16, 4.76s/it]\n",
|
||
|
" 61%|###### | 120/198 [07:47<05:58, 4.60s/it]\n",
|
||
|
" 61%|######1 | 121/198 [07:49<05:07, 4.00s/it]\n",
|
||
|
" 62%|######1 | 122/198 [07:52<04:34, 3.61s/it]\n",
|
||
|
" 62%|######2 | 123/198 [07:55<04:14, 3.40s/it]\n",
|
||
|
" 63%|######2 | 124/198 [07:57<03:55, 3.19s/it]\n",
|
||
|
" 63%|######3 | 125/198 [08:02<04:13, 3.47s/it]\n",
|
||
|
" 64%|######3 | 126/198 [08:05<04:17, 3.57s/it]\n",
|
||
|
" 64%|######4 | 127/198 [08:10<04:27, 3.77s/it]\n",
|
||
|
" 65%|######4 | 128/198 [08:12<04:02, 3.47s/it]\n",
|
||
|
" 65%|######5 | 129/198 [08:17<04:24, 3.84s/it]\n",
|
||
|
" 66%|######5 | 130/198 [08:21<04:28, 3.95s/it]\n",
|
||
|
" 66%|######6 | 131/198 [08:24<03:52, 3.47s/it]\n",
|
||
|
" 67%|######6 | 132/198 [08:27<03:40, 3.34s/it]\n",
|
||
|
" 67%|######7 | 133/198 [08:31<03:58, 3.66s/it]\n",
|
||
|
" 68%|######7 | 134/198 [08:36<04:11, 3.93s/it]\n",
|
||
|
" 68%|######8 | 135/198 [08:38<03:45, 3.58s/it]\n",
|
||
|
" 69%|######8 | 136/198 [08:41<03:26, 3.32s/it]\n",
|
||
|
" 69%|######9 | 137/198 [08:45<03:32, 3.49s/it]\n",
|
||
|
" 70%|######9 | 138/198 [08:49<03:43, 3.72s/it]\n",
|
||
|
" 70%|####### | 139/198 [08:53<03:37, 3.68s/it]\n",
|
||
|
" 71%|####### | 140/198 [08:57<03:38, 3.76s/it]\n",
|
||
|
" 71%|#######1 | 141/198 [09:00<03:18, 3.49s/it]\n",
|
||
|
" 72%|#######1 | 142/198 [09:03<03:07, 3.34s/it]\n",
|
||
|
" 72%|#######2 | 143/198 [09:07<03:17, 3.59s/it]\n",
|
||
|
" 73%|#######2 | 144/198 [09:10<03:03, 3.41s/it]\n",
|
||
|
" 73%|#######3 | 145/198 [09:13<02:58, 3.37s/it]\n",
|
||
|
" 74%|#######3 | 146/198 [09:17<03:07, 3.60s/it]\n",
|
||
|
" 74%|#######4 | 147/198 [09:21<03:01, 3.56s/it]\n",
|
||
|
" 75%|#######4 | 148/198 [09:25<03:13, 3.88s/it]\n",
|
||
|
" 75%|#######5 | 149/198 [09:29<03:02, 3.72s/it]\n",
|
||
|
" 76%|#######5 | 150/198 [09:33<03:05, 3.86s/it]\n",
|
||
|
" 76%|#######6 | 151/198 [09:36<02:51, 3.65s/it]\n",
|
||
|
" 77%|#######6 | 152/198 [09:40<02:51, 3.73s/it]\n",
|
||
|
" 77%|#######7 | 153/198 [09:43<02:40, 3.56s/it]\n",
|
||
|
" 78%|#######7 | 154/198 [09:46<02:27, 3.35s/it]\n",
|
||
|
" 78%|#######8 | 155/198 [09:50<02:31, 3.51s/it]\n",
|
||
|
" 79%|#######8 | 156/198 [09:53<02:19, 3.33s/it]\n",
|
||
|
" 79%|#######9 | 157/198 [09:56<02:12, 3.24s/it]\n",
|
||
|
" 80%|#######9 | 158/198 [09:58<02:02, 3.05s/it]\n",
|
||
|
" 80%|######## | 159/198 [10:01<01:52, 2.89s/it]\n",
|
||
|
" 81%|######## | 160/198 [10:04<01:46, 2.81s/it]\n",
|
||
|
" 81%|########1 | 161/198 [10:08<01:59, 3.23s/it]\n",
|
||
|
" 82%|########1 | 162/198 [10:11<02:00, 3.36s/it]\n",
|
||
|
" 82%|########2 | 163/198 [10:15<01:56, 3.32s/it]\n",
|
||
|
" 83%|########2 | 164/198 [10:19<02:01, 3.58s/it]\n",
|
||
|
" 83%|########3 | 165/198 [10:23<01:59, 3.63s/it]\n",
|
||
|
" 84%|########3 | 166/198 [10:27<02:00, 3.78s/it]\n",
|
||
|
" 84%|########4 | 167/198 [10:31<01:58, 3.83s/it]\n",
|
||
|
" 85%|########4 | 168/198 [10:38<02:23, 4.79s/it]\n",
|
||
|
" 85%|########5 | 169/198 [10:41<02:05, 4.33s/it]\n",
|
||
|
" 86%|########5 | 170/198 [10:43<01:44, 3.74s/it]\n",
|
||
|
" 86%|########6 | 171/198 [10:46<01:31, 3.40s/it]\n",
|
||
|
" 87%|########6 | 172/198 [10:50<01:35, 3.66s/it]\n",
|
||
|
" 87%|########7 | 173/198 [10:54<01:35, 3.81s/it]\n",
|
||
|
" 88%|########7 | 174/198 [10:59<01:36, 4.00s/it]\n",
|
||
|
" 88%|########8 | 175/198 [11:02<01:24, 3.68s/it]\n",
|
||
|
" 89%|########8 | 176/198 [11:06<01:26, 3.94s/it]\n",
|
||
|
" 89%|########9 | 177/198 [11:10<01:21, 3.89s/it]\n",
|
||
|
" 90%|########9 | 178/198 [11:14<01:16, 3.85s/it]\n",
|
||
|
" 90%|######### | 179/198 [11:17<01:07, 3.56s/it]\n",
|
||
|
" 91%|######### | 180/198 [11:20<01:00, 3.34s/it]\n",
|
||
|
" 91%|#########1| 181/198 [11:22<00:54, 3.18s/it]\n",
|
||
|
" 92%|#########1| 182/198 [11:27<00:55, 3.49s/it]\n",
|
||
|
" 92%|#########2| 183/198 [11:30<00:50, 3.36s/it]\n",
|
||
|
" 93%|#########2| 184/198 [11:34<00:50, 3.64s/it]\n",
|
||
|
" 93%|#########3| 185/198 [11:39<00:53, 4.08s/it]\n",
|
||
|
" 94%|#########3| 186/198 [11:42<00:43, 3.66s/it]\n",
|
||
|
" 94%|#########4| 187/198 [11:46<00:41, 3.80s/it]\n",
|
||
|
" 95%|#########4| 188/198 [11:50<00:38, 3.84s/it]\n",
|
||
|
" 95%|#########5| 189/198 [11:52<00:31, 3.47s/it]\n",
|
||
|
" 96%|#########5| 190/198 [11:55<00:26, 3.34s/it]\n",
|
||
|
" 96%|#########6| 191/198 [11:59<00:24, 3.52s/it]\n",
|
||
|
" 97%|#########6| 192/198 [12:02<00:19, 3.22s/it]\n",
|
||
|
" 97%|#########7| 193/198 [12:05<00:16, 3.31s/it]\n",
|
||
|
" 98%|#########7| 194/198 [12:10<00:15, 3.81s/it]\n",
|
||
|
" 98%|#########8| 195/198 [12:17<00:13, 4.65s/it]\n",
|
||
|
" 99%|#########8| 196/198 [12:20<00:08, 4.17s/it]\n",
|
||
|
" 99%|#########9| 197/198 [12:23<00:03, 3.68s/it]\n",
|
||
|
"100%|##########| 198/198 [12:25<00:00, 3.28s/it]02/16/2022 00:26:49 - INFO - __main__ - Epoch 0: {'accuracy': 0.884}\n",
|
||
|
"02/16/2022 00:27:16 - INFO - __main__ - Test-set evaluation: {'accuracy': 0.864}\n",
|
||
|
"Configuration saved in out/tweet/gpt2\\config.json\n",
|
||
|
"Model weights saved in out/tweet/gpt2\\pytorch_model.bin\n",
|
||
|
"tokenizer config file saved in out/tweet/gpt2\\tokenizer_config.json\n",
|
||
|
"Special tokens file saved in out/tweet/gpt2\\special_tokens_map.json\n",
|
||
|
"\n",
|
||
|
"100%|##########| 198/198 [13:25<00:00, 4.07s/it]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"!python run_glue_no_trainer.py \\\n",
|
||
|
" --model_name_or_path gpt2 \\\n",
|
||
|
" --train_file data/train.json \\\n",
|
||
|
" --validation_file data/valid.json \\\n",
|
||
|
" --test_file data/test.json \\\n",
|
||
|
" --per_device_train_batch_size 24 \\\n",
|
||
|
" --per_device_eval_batch_size 24 \\\n",
|
||
|
" --max_length 128 \\\n",
|
||
|
" --learning_rate 2e-5 \\\n",
|
||
|
" --num_train_epochs 1 \\\n",
|
||
|
" --output_dir out/tweet/gpt2"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# GPT2 version 2"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"02/16/2022 00:27:21 - INFO - __main__ - Distributed environment: NO\n",
|
||
|
"Num processes: 1\n",
|
||
|
"Process index: 0\n",
|
||
|
"Local process index: 0\n",
|
||
|
"Device: cpu\n",
|
||
|
"Use FP16 precision: False\n",
|
||
|
"\n",
|
||
|
"02/16/2022 00:27:22 - WARNING - datasets.builder - Using custom data configuration default-67c9d932a627b7b8\n",
|
||
|
"02/16/2022 00:27:22 - WARNING - datasets.builder - Reusing dataset json (C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426)\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/3 [00:00<?, ?it/s]\n",
|
||
|
"100%|##########| 3/3 [00:00<00:00, 176.25it/s]\n",
|
||
|
"loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
|
||
|
"Model config GPT2Config {\n",
|
||
|
" \"activation_function\": \"gelu_new\",\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"GPT2LMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"attn_pdrop\": 0.1,\n",
|
||
|
" \"bos_token_id\": 50256,\n",
|
||
|
" \"embd_pdrop\": 0.1,\n",
|
||
|
" \"eos_token_id\": 50256,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-05,\n",
|
||
|
" \"model_type\": \"gpt2\",\n",
|
||
|
" \"n_ctx\": 1024,\n",
|
||
|
" \"n_embd\": 768,\n",
|
||
|
" \"n_head\": 12,\n",
|
||
|
" \"n_inner\": null,\n",
|
||
|
" \"n_layer\": 12,\n",
|
||
|
" \"n_positions\": 1024,\n",
|
||
|
" \"reorder_and_upcast_attn\": false,\n",
|
||
|
" \"resid_pdrop\": 0.1,\n",
|
||
|
" \"scale_attn_by_inverse_layer_idx\": false,\n",
|
||
|
" \"scale_attn_weights\": true,\n",
|
||
|
" \"summary_activation\": null,\n",
|
||
|
" \"summary_first_dropout\": 0.1,\n",
|
||
|
" \"summary_proj_to_labels\": true,\n",
|
||
|
" \"summary_type\": \"cls_index\",\n",
|
||
|
" \"summary_use_proj\": true,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"text-generation\": {\n",
|
||
|
" \"do_sample\": true,\n",
|
||
|
" \"max_length\": 50\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50257\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
|
||
|
"loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
|
||
|
"Model config GPT2Config {\n",
|
||
|
" \"activation_function\": \"gelu_new\",\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"GPT2LMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"attn_pdrop\": 0.1,\n",
|
||
|
" \"bos_token_id\": 50256,\n",
|
||
|
" \"embd_pdrop\": 0.1,\n",
|
||
|
" \"eos_token_id\": 50256,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-05,\n",
|
||
|
" \"model_type\": \"gpt2\",\n",
|
||
|
" \"n_ctx\": 1024,\n",
|
||
|
" \"n_embd\": 768,\n",
|
||
|
" \"n_head\": 12,\n",
|
||
|
" \"n_inner\": null,\n",
|
||
|
" \"n_layer\": 12,\n",
|
||
|
" \"n_positions\": 1024,\n",
|
||
|
" \"reorder_and_upcast_attn\": false,\n",
|
||
|
" \"resid_pdrop\": 0.1,\n",
|
||
|
" \"scale_attn_by_inverse_layer_idx\": false,\n",
|
||
|
" \"scale_attn_weights\": true,\n",
|
||
|
" \"summary_activation\": null,\n",
|
||
|
" \"summary_first_dropout\": 0.1,\n",
|
||
|
" \"summary_proj_to_labels\": true,\n",
|
||
|
" \"summary_type\": \"cls_index\",\n",
|
||
|
" \"summary_use_proj\": true,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"text-generation\": {\n",
|
||
|
" \"do_sample\": true,\n",
|
||
|
" \"max_length\": 50\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50257\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/vocab.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\684fe667923972fb57f6b4dcb61a3c92763ad89882f3da5da9866baf14f2d60f.c7ed1f96aac49e745788faa77ba0a26a392643a50bb388b9c04ff469e555241f\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/merges.txt from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\c0c761a63004025aeadd530c4c27b860ec4ecbe8a00531233de21d865a402598.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/tokenizer.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\16a2f78023c8dc511294f0c97b5e10fde3ef9889ad6d11ffaa2a00714e73926e.cf2d0ecb83b6df91b3dbb53f1d1e4c311578bfd3aa0e04934215a49bf9898df0\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/added_tokens.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/special_tokens_map.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/tokenizer_config.json from cache at None\n",
|
||
|
"loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
|
||
|
"Model config GPT2Config {\n",
|
||
|
" \"activation_function\": \"gelu_new\",\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"GPT2LMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"attn_pdrop\": 0.1,\n",
|
||
|
" \"bos_token_id\": 50256,\n",
|
||
|
" \"embd_pdrop\": 0.1,\n",
|
||
|
" \"eos_token_id\": 50256,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-05,\n",
|
||
|
" \"model_type\": \"gpt2\",\n",
|
||
|
" \"n_ctx\": 1024,\n",
|
||
|
" \"n_embd\": 768,\n",
|
||
|
" \"n_head\": 12,\n",
|
||
|
" \"n_inner\": null,\n",
|
||
|
" \"n_layer\": 12,\n",
|
||
|
" \"n_positions\": 1024,\n",
|
||
|
" \"reorder_and_upcast_attn\": false,\n",
|
||
|
" \"resid_pdrop\": 0.1,\n",
|
||
|
" \"scale_attn_by_inverse_layer_idx\": false,\n",
|
||
|
" \"scale_attn_weights\": true,\n",
|
||
|
" \"summary_activation\": null,\n",
|
||
|
" \"summary_first_dropout\": 0.1,\n",
|
||
|
" \"summary_proj_to_labels\": true,\n",
|
||
|
" \"summary_type\": \"cls_index\",\n",
|
||
|
" \"summary_use_proj\": true,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"text-generation\": {\n",
|
||
|
" \"do_sample\": true,\n",
|
||
|
" \"max_length\": 50\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50257\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"02/16/2022 00:27:28 - INFO - __main__ - Return hidden states from model: False\n",
|
||
|
"02/16/2022 00:27:28 - INFO - __main__ - Using implementation from: AutoModelForSequenceClassification\n",
|
||
|
"loading weights file https://huggingface.co/gpt2/resolve/main/pytorch_model.bin from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\752929ace039baa8ef70fe21cdf9ab9445773d20e733cf693d667982e210837e.323c769945a351daa25546176f8208b3004b6f563438a7603e7932bae9025925\n",
|
||
|
"All model checkpoint weights were used when initializing GPT2ForSequenceClassification.\n",
|
||
|
"\n",
|
||
|
"Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']\n",
|
||
|
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
||
|
"02/16/2022 00:27:29 - INFO - __main__ - Freezing model weights\n",
|
||
|
"Using pad_token, but it is not set yet.\n",
|
||
|
"02/16/2022 00:27:29 - INFO - __main__ - Set PAD token to EOS: <|endoftext|>\n",
|
||
|
"02/16/2022 00:27:29 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-ba0dca0006a47e01.arrow\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/1 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 1/1 [00:00<00:00, 71.63ba/s]\n",
|
||
|
"02/16/2022 00:27:29 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-d41f6257e87d100c.arrow\n",
|
||
|
"02/16/2022 00:27:29 - INFO - __main__ - Sample 826 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [22940, 126, 222, 126, 250, 732, 262, 661, 22940, 126, 222, 126, 251, 6198, 4001, 6184, 95, 126, 222, 126, 250, 732, 262, 2330, 11, 1956, 19216, 10835, 13, 22940, 126, 222, 126, 251, 220, 220, 220, 220, 220, 6184, 95, 126, 222, 126, 99, 1303, 5304, 259, 19, 10879, 22940, 126, 222, 126, 99, 220], 'labels': 1}.\n",
|
||
|
"02/16/2022 00:27:29 - INFO - __main__ - Sample 521 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [31, 7220, 8425, 31582, 416, 2488, 7220, 287, 269, 30520, 13, 884, 23374, 986, 220, 220], 'labels': 1}.\n",
|
||
|
"02/16/2022 00:27:29 - INFO - __main__ - Sample 2806 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [4623, 68, 4964, 2168, 352, 286, 1303, 1169, 43764, 523, 355, 284, 3190, 3368, 4346, 13, 220, 220], 'labels': 0}.\n",
|
||
|
"02/16/2022 00:27:30 - INFO - __main__ - ***** Running training *****\n",
|
||
|
"02/16/2022 00:27:30 - INFO - __main__ - Num examples = 4742\n",
|
||
|
"02/16/2022 00:27:30 - INFO - __main__ - Num Epochs = 1\n",
|
||
|
"02/16/2022 00:27:30 - INFO - __main__ - Instantaneous batch size per device = 24\n",
|
||
|
"02/16/2022 00:27:30 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 24\n",
|
||
|
"02/16/2022 00:27:30 - INFO - __main__ - Gradient Accumulation steps = 1\n",
|
||
|
"02/16/2022 00:27:30 - INFO - __main__ - Total optimization steps = 198\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/198 [00:00<?, ?it/s]\n",
|
||
|
" 1%| | 1/198 [00:01<05:14, 1.59s/it]\n",
|
||
|
" 1%|1 | 2/198 [00:02<04:21, 1.33s/it]\n",
|
||
|
" 2%|1 | 3/198 [00:03<03:49, 1.18s/it]\n",
|
||
|
" 2%|2 | 4/198 [00:05<04:26, 1.38s/it]\n",
|
||
|
" 3%|2 | 5/198 [00:07<05:00, 1.56s/it]\n",
|
||
|
" 3%|3 | 6/198 [00:08<04:27, 1.39s/it]\n",
|
||
|
" 4%|3 | 7/198 [00:10<04:50, 1.52s/it]\n",
|
||
|
" 4%|4 | 8/198 [00:11<04:25, 1.40s/it]\n",
|
||
|
" 5%|4 | 9/198 [00:12<04:34, 1.45s/it]\n",
|
||
|
" 5%|5 | 10/198 [00:14<04:35, 1.46s/it]\n",
|
||
|
" 6%|5 | 11/198 [00:15<04:21, 1.40s/it]\n",
|
||
|
" 6%|6 | 12/198 [00:16<03:55, 1.27s/it]\n",
|
||
|
" 7%|6 | 13/198 [00:18<04:08, 1.34s/it]\n",
|
||
|
" 7%|7 | 14/198 [00:20<05:34, 1.82s/it]\n",
|
||
|
" 8%|7 | 15/198 [00:22<05:06, 1.67s/it]\n",
|
||
|
" 8%|8 | 16/198 [00:23<04:25, 1.46s/it]\n",
|
||
|
" 9%|8 | 17/198 [00:24<04:00, 1.33s/it]\n",
|
||
|
" 9%|9 | 18/198 [00:27<05:22, 1.79s/it]\n",
|
||
|
" 10%|9 | 19/198 [00:28<05:07, 1.72s/it]\n",
|
||
|
" 10%|# | 20/198 [00:29<04:24, 1.49s/it]\n",
|
||
|
" 11%|# | 21/198 [00:32<05:15, 1.78s/it]\n",
|
||
|
" 11%|#1 | 22/198 [00:33<05:00, 1.71s/it]\n",
|
||
|
" 12%|#1 | 23/198 [00:34<04:19, 1.48s/it]\n",
|
||
|
" 12%|#2 | 24/198 [00:35<03:58, 1.37s/it]\n",
|
||
|
" 13%|#2 | 25/198 [00:36<03:40, 1.27s/it]\n",
|
||
|
" 13%|#3 | 26/198 [00:38<03:52, 1.35s/it]\n",
|
||
|
" 14%|#3 | 27/198 [00:39<03:49, 1.34s/it]\n",
|
||
|
" 14%|#4 | 28/198 [00:40<03:28, 1.22s/it]\n",
|
||
|
" 15%|#4 | 29/198 [00:42<03:41, 1.31s/it]\n",
|
||
|
" 15%|#5 | 30/198 [00:42<03:18, 1.18s/it]\n",
|
||
|
" 16%|#5 | 31/198 [00:44<03:43, 1.34s/it]\n",
|
||
|
" 16%|#6 | 32/198 [00:46<03:45, 1.36s/it]\n",
|
||
|
" 17%|#6 | 33/198 [00:47<03:49, 1.39s/it]\n",
|
||
|
" 17%|#7 | 34/198 [00:48<03:34, 1.31s/it]\n",
|
||
|
" 18%|#7 | 35/198 [00:49<03:23, 1.25s/it]\n",
|
||
|
" 18%|#8 | 36/198 [00:50<03:11, 1.18s/it]\n",
|
||
|
" 19%|#8 | 37/198 [00:52<03:30, 1.31s/it]\n",
|
||
|
" 19%|#9 | 38/198 [00:53<03:12, 1.20s/it]\n",
|
||
|
" 20%|#9 | 39/198 [00:54<03:02, 1.15s/it]\n",
|
||
|
" 20%|## | 40/198 [00:56<03:29, 1.33s/it]\n",
|
||
|
" 21%|## | 41/198 [00:57<03:17, 1.26s/it]\n",
|
||
|
" 21%|##1 | 42/198 [00:58<03:29, 1.35s/it]\n",
|
||
|
" 22%|##1 | 43/198 [00:59<03:08, 1.22s/it]\n",
|
||
|
" 22%|##2 | 44/198 [01:00<03:03, 1.19s/it]\n",
|
||
|
" 23%|##2 | 45/198 [01:02<03:14, 1.27s/it]\n",
|
||
|
" 23%|##3 | 46/198 [01:03<03:26, 1.36s/it]\n",
|
||
|
" 24%|##3 | 47/198 [01:04<03:07, 1.24s/it]\n",
|
||
|
" 24%|##4 | 48/198 [01:06<03:18, 1.32s/it]\n",
|
||
|
" 25%|##4 | 49/198 [01:07<02:58, 1.19s/it]\n",
|
||
|
" 25%|##5 | 50/198 [01:08<02:46, 1.12s/it]\n",
|
||
|
" 26%|##5 | 51/198 [01:09<03:04, 1.26s/it]\n",
|
||
|
" 26%|##6 | 52/198 [01:11<03:16, 1.34s/it]\n",
|
||
|
" 27%|##6 | 53/198 [01:12<02:55, 1.21s/it]\n",
|
||
|
" 27%|##7 | 54/198 [01:13<03:07, 1.30s/it]\n",
|
||
|
" 28%|##7 | 55/198 [01:14<03:00, 1.26s/it]\n",
|
||
|
" 28%|##8 | 56/198 [01:15<02:40, 1.13s/it]\n",
|
||
|
" 29%|##8 | 57/198 [01:17<03:10, 1.35s/it]\n",
|
||
|
" 29%|##9 | 58/198 [01:18<03:02, 1.30s/it]\n",
|
||
|
" 30%|##9 | 59/198 [01:20<03:09, 1.37s/it]\n",
|
||
|
" 30%|### | 60/198 [01:21<02:45, 1.20s/it]\n",
|
||
|
" 31%|### | 61/198 [01:22<02:40, 1.17s/it]\n",
|
||
|
" 31%|###1 | 62/198 [01:23<02:41, 1.18s/it]\n",
|
||
|
" 32%|###1 | 63/198 [01:24<02:54, 1.29s/it]\n",
|
||
|
" 32%|###2 | 64/198 [01:26<02:48, 1.26s/it]\n",
|
||
|
" 33%|###2 | 65/198 [01:27<02:56, 1.33s/it]\n",
|
||
|
" 33%|###3 | 66/198 [01:29<03:03, 1.39s/it]\n",
|
||
|
" 34%|###3 | 67/198 [01:30<03:10, 1.45s/it]\n",
|
||
|
" 34%|###4 | 68/198 [01:33<03:44, 1.73s/it]\n",
|
||
|
" 35%|###4 | 69/198 [01:34<03:16, 1.52s/it]\n",
|
||
|
" 35%|###5 | 70/198 [01:35<03:16, 1.53s/it]\n",
|
||
|
" 36%|###5 | 71/198 [01:36<02:53, 1.37s/it]\n",
|
||
|
" 36%|###6 | 72/198 [01:38<03:00, 1.43s/it]\n",
|
||
|
" 37%|###6 | 73/198 [01:39<02:58, 1.43s/it]\n",
|
||
|
" 37%|###7 | 74/198 [01:41<02:59, 1.45s/it]\n",
|
||
|
" 38%|###7 | 75/198 [01:42<02:45, 1.34s/it]\n",
|
||
|
" 38%|###8 | 76/198 [01:43<02:35, 1.28s/it]\n",
|
||
|
" 39%|###8 | 77/198 [01:44<02:40, 1.33s/it]\n",
|
||
|
" 39%|###9 | 78/198 [01:46<02:32, 1.27s/it]\n",
|
||
|
" 40%|###9 | 79/198 [01:47<02:31, 1.27s/it]\n",
|
||
|
" 40%|#### | 80/198 [01:48<02:28, 1.26s/it]\n",
|
||
|
" 41%|#### | 81/198 [01:49<02:18, 1.19s/it]\n",
|
||
|
" 41%|####1 | 82/198 [01:52<03:16, 1.69s/it]\n",
|
||
|
" 42%|####1 | 83/198 [01:53<03:10, 1.65s/it]\n",
|
||
|
" 42%|####2 | 84/198 [01:55<02:49, 1.49s/it]\n",
|
||
|
" 43%|####2 | 85/198 [01:56<02:56, 1.56s/it]\n",
|
||
|
" 43%|####3 | 86/198 [01:57<02:29, 1.34s/it]\n",
|
||
|
" 44%|####3 | 87/198 [01:58<02:24, 1.30s/it]\n",
|
||
|
" 44%|####4 | 88/198 [01:59<02:14, 1.23s/it]\n",
|
||
|
" 45%|####4 | 89/198 [02:01<02:13, 1.22s/it]\n",
|
||
|
" 45%|####5 | 90/198 [02:02<02:14, 1.24s/it]\n",
|
||
|
" 46%|####5 | 91/198 [02:03<02:09, 1.21s/it]\n",
|
||
|
" 46%|####6 | 92/198 [02:04<01:59, 1.13s/it]\n",
|
||
|
" 47%|####6 | 93/198 [02:05<01:53, 1.08s/it]\n",
|
||
|
" 47%|####7 | 94/198 [02:06<01:53, 1.09s/it]\n",
|
||
|
" 48%|####7 | 95/198 [02:07<01:45, 1.02s/it]\n",
|
||
|
" 48%|####8 | 96/198 [02:08<01:59, 1.17s/it]\n",
|
||
|
" 49%|####8 | 97/198 [02:09<01:53, 1.12s/it]\n",
|
||
|
" 49%|####9 | 98/198 [02:11<02:14, 1.35s/it]\n",
|
||
|
" 50%|##### | 99/198 [02:13<02:13, 1.35s/it]\n",
|
||
|
" 51%|##### | 100/198 [02:15<02:51, 1.75s/it]\n",
|
||
|
" 51%|#####1 | 101/198 [02:18<03:02, 1.88s/it]\n",
|
||
|
" 52%|#####1 | 102/198 [02:18<02:33, 1.60s/it]\n",
|
||
|
" 52%|#####2 | 103/198 [02:19<02:09, 1.36s/it]\n",
|
||
|
" 53%|#####2 | 104/198 [02:20<01:59, 1.27s/it]\n",
|
||
|
" 53%|#####3 | 105/198 [02:22<02:07, 1.37s/it]\n",
|
||
|
" 54%|#####3 | 106/198 [02:23<02:07, 1.38s/it]\n",
|
||
|
" 54%|#####4 | 107/198 [02:25<02:06, 1.39s/it]\n",
|
||
|
" 55%|#####4 | 108/198 [02:26<02:09, 1.43s/it]\n",
|
||
|
" 55%|#####5 | 109/198 [02:29<02:47, 1.88s/it]\n",
|
||
|
" 56%|#####5 | 110/198 [02:30<02:28, 1.69s/it]\n",
|
||
|
" 56%|#####6 | 111/198 [02:31<02:07, 1.47s/it]\n",
|
||
|
" 57%|#####6 | 112/198 [02:33<02:06, 1.47s/it]\n",
|
||
|
" 57%|#####7 | 113/198 [02:34<01:51, 1.31s/it]\n",
|
||
|
" 58%|#####7 | 114/198 [02:35<01:56, 1.39s/it]\n",
|
||
|
" 58%|#####8 | 115/198 [02:36<01:46, 1.29s/it]\n",
|
||
|
" 59%|#####8 | 116/198 [02:38<01:46, 1.29s/it]\n",
|
||
|
" 59%|#####9 | 117/198 [02:39<01:39, 1.23s/it]\n",
|
||
|
" 60%|#####9 | 118/198 [02:40<01:39, 1.25s/it]\n",
|
||
|
" 60%|###### | 119/198 [02:41<01:30, 1.15s/it]\n",
|
||
|
" 61%|###### | 120/198 [02:42<01:26, 1.11s/it]\n",
|
||
|
" 61%|######1 | 121/198 [02:43<01:31, 1.19s/it]\n",
|
||
|
" 62%|######1 | 122/198 [02:45<01:27, 1.15s/it]\n",
|
||
|
" 62%|######2 | 123/198 [02:46<01:31, 1.22s/it]\n",
|
||
|
" 63%|######2 | 124/198 [02:47<01:29, 1.21s/it]\n",
|
||
|
" 63%|######3 | 125/198 [02:49<01:37, 1.33s/it]\n",
|
||
|
" 64%|######3 | 126/198 [02:50<01:30, 1.25s/it]\n",
|
||
|
" 64%|######4 | 127/198 [02:52<01:39, 1.41s/it]\n",
|
||
|
" 65%|######4 | 128/198 [02:53<01:39, 1.42s/it]\n",
|
||
|
" 65%|######5 | 129/198 [02:54<01:32, 1.34s/it]\n",
|
||
|
" 66%|######5 | 130/198 [02:55<01:25, 1.25s/it]\n",
|
||
|
" 66%|######6 | 131/198 [02:58<01:52, 1.68s/it]\n",
|
||
|
" 67%|######6 | 132/198 [02:59<01:37, 1.47s/it]\n",
|
||
|
" 67%|######7 | 133/198 [03:01<01:41, 1.56s/it]\n",
|
||
|
" 68%|######7 | 134/198 [03:02<01:36, 1.50s/it]\n",
|
||
|
" 68%|######8 | 135/198 [03:03<01:28, 1.41s/it]\n",
|
||
|
" 69%|######8 | 136/198 [03:05<01:30, 1.47s/it]\n",
|
||
|
" 69%|######9 | 137/198 [03:06<01:17, 1.26s/it]\n",
|
||
|
" 70%|######9 | 138/198 [03:07<01:15, 1.27s/it]\n",
|
||
|
" 70%|####### | 139/198 [03:08<01:06, 1.13s/it]\n",
|
||
|
" 71%|####### | 140/198 [03:09<01:10, 1.21s/it]\n",
|
||
|
" 71%|#######1 | 141/198 [03:11<01:28, 1.55s/it]\n",
|
||
|
" 72%|#######1 | 142/198 [03:13<01:31, 1.63s/it]\n",
|
||
|
" 72%|#######2 | 143/198 [03:15<01:26, 1.58s/it]\n",
|
||
|
" 73%|#######2 | 144/198 [03:15<01:12, 1.35s/it]\n",
|
||
|
" 73%|#######3 | 145/198 [03:16<01:03, 1.19s/it]\n",
|
||
|
" 74%|#######3 | 146/198 [03:17<00:59, 1.15s/it]\n",
|
||
|
" 74%|#######4 | 147/198 [03:18<00:56, 1.11s/it]\n",
|
||
|
" 75%|#######4 | 148/198 [03:19<00:53, 1.07s/it]\n",
|
||
|
" 75%|#######5 | 149/198 [03:21<00:59, 1.21s/it]\n",
|
||
|
" 76%|#######5 | 150/198 [03:22<00:54, 1.14s/it]\n",
|
||
|
" 76%|#######6 | 151/198 [03:23<00:50, 1.08s/it]\n",
|
||
|
" 77%|#######6 | 152/198 [03:25<01:00, 1.32s/it]\n",
|
||
|
" 77%|#######7 | 153/198 [03:26<01:02, 1.38s/it]\n",
|
||
|
" 78%|#######7 | 154/198 [03:27<00:55, 1.27s/it]\n",
|
||
|
" 78%|#######8 | 155/198 [03:29<00:55, 1.29s/it]\n",
|
||
|
" 79%|#######8 | 156/198 [03:30<00:56, 1.35s/it]\n",
|
||
|
" 79%|#######9 | 157/198 [03:31<00:49, 1.22s/it]\n",
|
||
|
" 80%|#######9 | 158/198 [03:33<00:59, 1.49s/it]\n",
|
||
|
" 80%|######## | 159/198 [03:34<00:52, 1.34s/it]\n",
|
||
|
" 81%|######## | 160/198 [03:35<00:51, 1.36s/it]\n",
|
||
|
" 81%|########1 | 161/198 [03:37<00:47, 1.27s/it]\n",
|
||
|
" 82%|########1 | 162/198 [03:37<00:41, 1.15s/it]\n",
|
||
|
" 82%|########2 | 163/198 [03:38<00:36, 1.06s/it]\n",
|
||
|
" 83%|########2 | 164/198 [03:40<00:40, 1.20s/it]\n",
|
||
|
" 83%|########3 | 165/198 [03:41<00:42, 1.30s/it]\n",
|
||
|
" 84%|########3 | 166/198 [03:44<00:54, 1.69s/it]\n",
|
||
|
" 84%|########4 | 167/198 [03:47<01:03, 2.06s/it]\n",
|
||
|
" 85%|########4 | 168/198 [03:48<00:54, 1.81s/it]\n",
|
||
|
" 85%|########5 | 169/198 [03:50<00:49, 1.70s/it]\n",
|
||
|
" 86%|########5 | 170/198 [03:51<00:41, 1.49s/it]\n",
|
||
|
" 86%|########6 | 171/198 [03:52<00:37, 1.37s/it]\n",
|
||
|
" 87%|########6 | 172/198 [03:52<00:30, 1.18s/it]\n",
|
||
|
" 87%|########7 | 173/198 [03:53<00:27, 1.09s/it]\n",
|
||
|
" 88%|########7 | 174/198 [03:54<00:24, 1.03s/it]\n",
|
||
|
" 88%|########8 | 175/198 [03:56<00:26, 1.15s/it]\n",
|
||
|
" 89%|########8 | 176/198 [03:58<00:36, 1.67s/it]\n",
|
||
|
" 89%|########9 | 177/198 [04:00<00:32, 1.56s/it]\n",
|
||
|
" 90%|########9 | 178/198 [04:01<00:27, 1.38s/it]\n",
|
||
|
" 90%|######### | 179/198 [04:02<00:23, 1.24s/it]\n",
|
||
|
" 91%|######### | 180/198 [04:03<00:21, 1.18s/it]\n",
|
||
|
" 91%|#########1| 181/198 [04:04<00:19, 1.12s/it]\n",
|
||
|
" 92%|#########1| 182/198 [04:05<00:20, 1.26s/it]\n",
|
||
|
" 92%|#########2| 183/198 [04:07<00:19, 1.28s/it]\n",
|
||
|
" 93%|#########2| 184/198 [04:09<00:21, 1.54s/it]\n",
|
||
|
" 93%|#########3| 185/198 [04:10<00:20, 1.54s/it]\n",
|
||
|
" 94%|#########3| 186/198 [04:11<00:16, 1.42s/it]\n",
|
||
|
" 94%|#########4| 187/198 [04:12<00:14, 1.31s/it]\n",
|
||
|
" 95%|#########4| 188/198 [04:14<00:12, 1.29s/it]\n",
|
||
|
" 95%|#########5| 189/198 [04:15<00:10, 1.15s/it]\n",
|
||
|
" 96%|#########5| 190/198 [04:16<00:09, 1.25s/it]\n",
|
||
|
" 96%|#########6| 191/198 [04:18<00:09, 1.34s/it]\n",
|
||
|
" 97%|#########6| 192/198 [04:18<00:06, 1.17s/it]\n",
|
||
|
" 97%|#########7| 193/198 [04:21<00:07, 1.51s/it]\n",
|
||
|
" 98%|#########7| 194/198 [04:22<00:05, 1.39s/it]\n",
|
||
|
" 98%|#########8| 195/198 [04:23<00:03, 1.31s/it]\n",
|
||
|
" 99%|#########8| 196/198 [04:24<00:02, 1.17s/it]\n",
|
||
|
" 99%|#########9| 197/198 [04:25<00:01, 1.10s/it]\n",
|
||
|
"100%|##########| 198/198 [04:26<00:00, 1.09s/it]02/16/2022 00:32:30 - INFO - __main__ - Epoch 0: {'accuracy': 0.846}\n",
|
||
|
"02/16/2022 00:32:57 - INFO - __main__ - Test-set evaluation: {'accuracy': 0.904}\n",
|
||
|
"Configuration saved in out/tweet/gpt2_version_2\\config.json\n",
|
||
|
"Model weights saved in out/tweet/gpt2_version_2\\pytorch_model.bin\n",
|
||
|
"tokenizer config file saved in out/tweet/gpt2_version_2\\tokenizer_config.json\n",
|
||
|
"Special tokens file saved in out/tweet/gpt2_version_2\\special_tokens_map.json\n",
|
||
|
"\n",
|
||
|
"100%|##########| 198/198 [05:27<00:00, 1.65s/it]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"!python run_glue_no_trainer.py \\\n",
|
||
|
" --model_name_or_path gpt2 \\\n",
|
||
|
" --train_file data/train.json \\\n",
|
||
|
" --validation_file data/valid.json \\\n",
|
||
|
" --test_file data/test.json \\\n",
|
||
|
" --per_device_train_batch_size 24 \\\n",
|
||
|
" --per_device_eval_batch_size 24 \\\n",
|
||
|
" --max_length 128 \\\n",
|
||
|
" --freeze_model \\\n",
|
||
|
" --learning_rate 2e-5 \\\n",
|
||
|
" --num_train_epochs 1 \\\n",
|
||
|
" --output_dir out/tweet/gpt2_version_2"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# GPT2 version 3 "
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"02/16/2022 00:33:00 - INFO - __main__ - Distributed environment: NO\n",
|
||
|
"Num processes: 1\n",
|
||
|
"Process index: 0\n",
|
||
|
"Local process index: 0\n",
|
||
|
"Device: cpu\n",
|
||
|
"Use FP16 precision: False\n",
|
||
|
"\n",
|
||
|
"02/16/2022 00:33:00 - WARNING - datasets.builder - Using custom data configuration default-67c9d932a627b7b8\n",
|
||
|
"02/16/2022 00:33:00 - WARNING - datasets.builder - Reusing dataset json (C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426)\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/3 [00:00<?, ?it/s]\n",
|
||
|
"100%|##########| 3/3 [00:00<00:00, 1504.23it/s]\n",
|
||
|
"loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
|
||
|
"Model config GPT2Config {\n",
|
||
|
" \"activation_function\": \"gelu_new\",\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"GPT2LMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"attn_pdrop\": 0.1,\n",
|
||
|
" \"bos_token_id\": 50256,\n",
|
||
|
" \"embd_pdrop\": 0.1,\n",
|
||
|
" \"eos_token_id\": 50256,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-05,\n",
|
||
|
" \"model_type\": \"gpt2\",\n",
|
||
|
" \"n_ctx\": 1024,\n",
|
||
|
" \"n_embd\": 768,\n",
|
||
|
" \"n_head\": 12,\n",
|
||
|
" \"n_inner\": null,\n",
|
||
|
" \"n_layer\": 12,\n",
|
||
|
" \"n_positions\": 1024,\n",
|
||
|
" \"reorder_and_upcast_attn\": false,\n",
|
||
|
" \"resid_pdrop\": 0.1,\n",
|
||
|
" \"scale_attn_by_inverse_layer_idx\": false,\n",
|
||
|
" \"scale_attn_weights\": true,\n",
|
||
|
" \"summary_activation\": null,\n",
|
||
|
" \"summary_first_dropout\": 0.1,\n",
|
||
|
" \"summary_proj_to_labels\": true,\n",
|
||
|
" \"summary_type\": \"cls_index\",\n",
|
||
|
" \"summary_use_proj\": true,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"text-generation\": {\n",
|
||
|
" \"do_sample\": true,\n",
|
||
|
" \"max_length\": 50\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50257\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
|
||
|
"loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
|
||
|
"Model config GPT2Config {\n",
|
||
|
" \"activation_function\": \"gelu_new\",\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"GPT2LMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"attn_pdrop\": 0.1,\n",
|
||
|
" \"bos_token_id\": 50256,\n",
|
||
|
" \"embd_pdrop\": 0.1,\n",
|
||
|
" \"eos_token_id\": 50256,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-05,\n",
|
||
|
" \"model_type\": \"gpt2\",\n",
|
||
|
" \"n_ctx\": 1024,\n",
|
||
|
" \"n_embd\": 768,\n",
|
||
|
" \"n_head\": 12,\n",
|
||
|
" \"n_inner\": null,\n",
|
||
|
" \"n_layer\": 12,\n",
|
||
|
" \"n_positions\": 1024,\n",
|
||
|
" \"reorder_and_upcast_attn\": false,\n",
|
||
|
" \"resid_pdrop\": 0.1,\n",
|
||
|
" \"scale_attn_by_inverse_layer_idx\": false,\n",
|
||
|
" \"scale_attn_weights\": true,\n",
|
||
|
" \"summary_activation\": null,\n",
|
||
|
" \"summary_first_dropout\": 0.1,\n",
|
||
|
" \"summary_proj_to_labels\": true,\n",
|
||
|
" \"summary_type\": \"cls_index\",\n",
|
||
|
" \"summary_use_proj\": true,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"text-generation\": {\n",
|
||
|
" \"do_sample\": true,\n",
|
||
|
" \"max_length\": 50\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50257\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/vocab.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\684fe667923972fb57f6b4dcb61a3c92763ad89882f3da5da9866baf14f2d60f.c7ed1f96aac49e745788faa77ba0a26a392643a50bb388b9c04ff469e555241f\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/merges.txt from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\c0c761a63004025aeadd530c4c27b860ec4ecbe8a00531233de21d865a402598.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/tokenizer.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\16a2f78023c8dc511294f0c97b5e10fde3ef9889ad6d11ffaa2a00714e73926e.cf2d0ecb83b6df91b3dbb53f1d1e4c311578bfd3aa0e04934215a49bf9898df0\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/added_tokens.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/special_tokens_map.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/tokenizer_config.json from cache at None\n",
|
||
|
"loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
|
||
|
"Model config GPT2Config {\n",
|
||
|
" \"activation_function\": \"gelu_new\",\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"GPT2LMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"attn_pdrop\": 0.1,\n",
|
||
|
" \"bos_token_id\": 50256,\n",
|
||
|
" \"embd_pdrop\": 0.1,\n",
|
||
|
" \"eos_token_id\": 50256,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-05,\n",
|
||
|
" \"model_type\": \"gpt2\",\n",
|
||
|
" \"n_ctx\": 1024,\n",
|
||
|
" \"n_embd\": 768,\n",
|
||
|
" \"n_head\": 12,\n",
|
||
|
" \"n_inner\": null,\n",
|
||
|
" \"n_layer\": 12,\n",
|
||
|
" \"n_positions\": 1024,\n",
|
||
|
" \"reorder_and_upcast_attn\": false,\n",
|
||
|
" \"resid_pdrop\": 0.1,\n",
|
||
|
" \"scale_attn_by_inverse_layer_idx\": false,\n",
|
||
|
" \"scale_attn_weights\": true,\n",
|
||
|
" \"summary_activation\": null,\n",
|
||
|
" \"summary_first_dropout\": 0.1,\n",
|
||
|
" \"summary_proj_to_labels\": true,\n",
|
||
|
" \"summary_type\": \"cls_index\",\n",
|
||
|
" \"summary_use_proj\": true,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"text-generation\": {\n",
|
||
|
" \"do_sample\": true,\n",
|
||
|
" \"max_length\": 50\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50257\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"02/16/2022 00:33:06 - INFO - __main__ - Return hidden states from model: False\n",
|
||
|
"02/16/2022 00:33:06 - INFO - __main__ - Using implementation from: GPT2ForSequenceClassificationCustom\n",
|
||
|
"loading weights file https://huggingface.co/gpt2/resolve/main/pytorch_model.bin from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\752929ace039baa8ef70fe21cdf9ab9445773d20e733cf693d667982e210837e.323c769945a351daa25546176f8208b3004b6f563438a7603e7932bae9025925\n",
|
||
|
"All model checkpoint weights were used when initializing GPT2ForSequenceClassificationCustom.\n",
|
||
|
"\n",
|
||
|
"Some weights of GPT2ForSequenceClassificationCustom were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.dense_1_hidden.bias', 'score.dense_1_input.bias', 'score.out_proj.weight', 'score.dense_2.bias', 'score.dense_1_hidden.weight', 'score.dense_2.weight', 'score.dense_1_input.weight']\n",
|
||
|
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
||
|
"02/16/2022 00:33:08 - INFO - __main__ - Freezing model weights\n",
|
||
|
"Using pad_token, but it is not set yet.\n",
|
||
|
"02/16/2022 00:33:08 - INFO - __main__ - Set PAD token to EOS: <|endoftext|>\n",
|
||
|
"02/16/2022 00:33:08 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-f4385b00908c069e.arrow\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/1 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 1/1 [00:00<00:00, 32.35ba/s]\n",
|
||
|
"02/16/2022 00:33:08 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-c36412d695a9c6f1.arrow\n",
|
||
|
"02/16/2022 00:33:08 - INFO - __main__ - Sample 1528 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [258, 338, 281, 555, 2382, 7490, 764, 1303, 22584, 220], 'labels': 1}.\n",
|
||
|
"02/16/2022 00:33:08 - INFO - __main__ - Sample 113 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [31, 7220, 43646, 2148, 20577, 1303, 37098, 13948, 1337, 329, 1303, 11085, 77, 602, 25, 5387, 16155, 220, 1303, 17089, 6894, 5171, 4763, 220], 'labels': 1}.\n",
|
||
|
"02/16/2022 00:33:08 - INFO - __main__ - Sample 485 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [31, 7220, 2488, 7220, 883, 6886, 284, 220, 1303, 12480, 4604, 594, 1303, 5183, 445, 1303, 259, 31012, 1303, 42570, 6098, 999, 1303, 721, 16207, 481, 1309, 1303, 40954, 760, 674, 8666, 1303, 5539], 'labels': 1}.\n",
|
||
|
"02/16/2022 00:33:09 - INFO - __main__ - ***** Running training *****\n",
|
||
|
"02/16/2022 00:33:09 - INFO - __main__ - Num examples = 4742\n",
|
||
|
"02/16/2022 00:33:09 - INFO - __main__ - Num Epochs = 1\n",
|
||
|
"02/16/2022 00:33:09 - INFO - __main__ - Instantaneous batch size per device = 24\n",
|
||
|
"02/16/2022 00:33:09 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 24\n",
|
||
|
"02/16/2022 00:33:09 - INFO - __main__ - Gradient Accumulation steps = 1\n",
|
||
|
"02/16/2022 00:33:09 - INFO - __main__ - Total optimization steps = 198\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/198 [00:00<?, ?it/s]\n",
|
||
|
" 1%| | 1/198 [00:01<03:23, 1.03s/it]\n",
|
||
|
" 1%|1 | 2/198 [00:02<03:32, 1.08s/it]\n",
|
||
|
" 2%|1 | 3/198 [00:04<04:40, 1.44s/it]\n",
|
||
|
" 2%|2 | 4/198 [00:05<04:33, 1.41s/it]\n",
|
||
|
" 3%|2 | 5/198 [00:06<04:33, 1.42s/it]\n",
|
||
|
" 3%|3 | 6/198 [00:08<04:18, 1.35s/it]\n",
|
||
|
" 4%|3 | 7/198 [00:09<04:03, 1.28s/it]\n",
|
||
|
" 4%|4 | 8/198 [00:10<03:43, 1.17s/it]\n",
|
||
|
" 5%|4 | 9/198 [00:12<05:08, 1.63s/it]\n",
|
||
|
" 5%|5 | 10/198 [00:14<05:01, 1.60s/it]\n",
|
||
|
" 6%|5 | 11/198 [00:15<04:29, 1.44s/it]\n",
|
||
|
" 6%|6 | 12/198 [00:17<04:39, 1.50s/it]\n",
|
||
|
" 7%|6 | 13/198 [00:18<04:23, 1.42s/it]\n",
|
||
|
" 7%|7 | 14/198 [00:19<04:04, 1.33s/it]\n",
|
||
|
" 8%|7 | 15/198 [00:22<05:33, 1.82s/it]\n",
|
||
|
" 8%|8 | 16/198 [00:23<05:13, 1.72s/it]\n",
|
||
|
" 9%|8 | 17/198 [00:25<05:16, 1.75s/it]\n",
|
||
|
" 9%|9 | 18/198 [00:26<04:28, 1.49s/it]\n",
|
||
|
" 10%|9 | 19/198 [00:28<04:34, 1.53s/it]\n",
|
||
|
" 10%|# | 20/198 [00:30<04:55, 1.66s/it]\n",
|
||
|
" 11%|# | 21/198 [00:31<04:30, 1.53s/it]\n",
|
||
|
" 11%|#1 | 22/198 [00:32<03:57, 1.35s/it]\n",
|
||
|
" 12%|#1 | 23/198 [00:33<04:04, 1.39s/it]\n",
|
||
|
" 12%|#2 | 24/198 [00:36<04:50, 1.67s/it]\n",
|
||
|
" 13%|#2 | 25/198 [00:37<04:31, 1.57s/it]\n",
|
||
|
" 13%|#3 | 26/198 [00:38<04:05, 1.43s/it]\n",
|
||
|
" 14%|#3 | 27/198 [00:39<03:49, 1.34s/it]\n",
|
||
|
" 14%|#4 | 28/198 [00:40<03:37, 1.28s/it]\n",
|
||
|
" 15%|#4 | 29/198 [00:42<03:48, 1.35s/it]\n",
|
||
|
" 15%|#5 | 30/198 [00:44<04:06, 1.47s/it]\n",
|
||
|
" 16%|#5 | 31/198 [00:45<03:43, 1.34s/it]\n",
|
||
|
" 16%|#6 | 32/198 [00:46<03:35, 1.30s/it]\n",
|
||
|
" 17%|#6 | 33/198 [00:47<03:27, 1.26s/it]\n",
|
||
|
" 17%|#7 | 34/198 [00:50<04:49, 1.76s/it]\n",
|
||
|
" 18%|#7 | 35/198 [00:51<04:15, 1.57s/it]\n",
|
||
|
" 18%|#8 | 36/198 [00:53<04:21, 1.61s/it]\n",
|
||
|
" 19%|#8 | 37/198 [00:54<04:23, 1.64s/it]\n",
|
||
|
" 19%|#9 | 38/198 [00:56<04:25, 1.66s/it]\n",
|
||
|
" 20%|#9 | 39/198 [00:58<04:25, 1.67s/it]\n",
|
||
|
" 20%|## | 40/198 [00:59<04:03, 1.54s/it]\n",
|
||
|
" 21%|## | 41/198 [01:00<03:52, 1.48s/it]\n",
|
||
|
" 21%|##1 | 42/198 [01:01<03:32, 1.36s/it]\n",
|
||
|
" 22%|##1 | 43/198 [01:03<03:29, 1.35s/it]\n",
|
||
|
" 22%|##2 | 44/198 [01:04<03:16, 1.28s/it]\n",
|
||
|
" 23%|##2 | 45/198 [01:06<03:31, 1.38s/it]\n",
|
||
|
" 23%|##3 | 46/198 [01:07<03:37, 1.43s/it]\n",
|
||
|
" 24%|##3 | 47/198 [01:08<03:17, 1.31s/it]\n",
|
||
|
" 24%|##4 | 48/198 [01:09<02:57, 1.19s/it]\n",
|
||
|
" 25%|##4 | 49/198 [01:10<02:52, 1.16s/it]\n",
|
||
|
" 25%|##5 | 50/198 [01:12<03:23, 1.38s/it]\n",
|
||
|
" 26%|##5 | 51/198 [01:13<03:16, 1.34s/it]\n",
|
||
|
" 26%|##6 | 52/198 [01:14<03:09, 1.30s/it]\n",
|
||
|
" 27%|##6 | 53/198 [01:16<03:07, 1.29s/it]\n",
|
||
|
" 27%|##7 | 54/198 [01:17<03:08, 1.31s/it]\n",
|
||
|
" 28%|##7 | 55/198 [01:19<03:22, 1.42s/it]\n",
|
||
|
" 28%|##8 | 56/198 [01:21<04:06, 1.74s/it]\n",
|
||
|
" 29%|##8 | 57/198 [01:22<03:35, 1.53s/it]\n",
|
||
|
" 29%|##9 | 58/198 [01:23<03:16, 1.40s/it]\n",
|
||
|
" 30%|##9 | 59/198 [01:26<04:01, 1.74s/it]\n",
|
||
|
" 30%|### | 60/198 [01:28<03:57, 1.72s/it]\n",
|
||
|
" 31%|### | 61/198 [01:29<03:49, 1.67s/it]\n",
|
||
|
" 31%|###1 | 62/198 [01:31<04:04, 1.80s/it]\n",
|
||
|
" 32%|###1 | 63/198 [01:33<03:44, 1.67s/it]\n",
|
||
|
" 32%|###2 | 64/198 [01:34<03:18, 1.48s/it]\n",
|
||
|
" 33%|###2 | 65/198 [01:35<02:57, 1.33s/it]\n",
|
||
|
" 33%|###3 | 66/198 [01:36<02:44, 1.25s/it]\n",
|
||
|
" 34%|###3 | 67/198 [01:37<02:59, 1.37s/it]\n",
|
||
|
" 34%|###4 | 68/198 [01:40<03:45, 1.73s/it]\n",
|
||
|
" 35%|###4 | 69/198 [01:41<03:29, 1.63s/it]\n",
|
||
|
" 35%|###5 | 70/198 [01:42<03:10, 1.49s/it]\n",
|
||
|
" 36%|###5 | 71/198 [01:44<03:17, 1.55s/it]\n",
|
||
|
" 36%|###6 | 72/198 [01:46<03:32, 1.69s/it]\n",
|
||
|
" 37%|###6 | 73/198 [01:47<03:06, 1.49s/it]\n",
|
||
|
" 37%|###7 | 74/198 [01:48<02:50, 1.38s/it]\n",
|
||
|
" 38%|###7 | 75/198 [01:49<02:38, 1.29s/it]\n",
|
||
|
" 38%|###8 | 76/198 [01:51<02:50, 1.40s/it]\n",
|
||
|
" 39%|###8 | 77/198 [01:52<02:50, 1.41s/it]\n",
|
||
|
" 39%|###9 | 78/198 [01:54<02:41, 1.34s/it]\n",
|
||
|
" 40%|###9 | 79/198 [01:55<02:54, 1.46s/it]\n",
|
||
|
" 40%|#### | 80/198 [01:58<03:39, 1.86s/it]\n",
|
||
|
" 41%|#### | 81/198 [01:59<03:05, 1.59s/it]\n",
|
||
|
" 41%|####1 | 82/198 [02:00<02:42, 1.40s/it]\n",
|
||
|
" 42%|####1 | 83/198 [02:01<02:26, 1.28s/it]\n",
|
||
|
" 42%|####2 | 84/198 [02:02<02:23, 1.25s/it]\n",
|
||
|
" 43%|####2 | 85/198 [02:03<02:12, 1.17s/it]\n",
|
||
|
" 43%|####3 | 86/198 [02:05<02:14, 1.20s/it]\n",
|
||
|
" 44%|####3 | 87/198 [02:06<02:19, 1.26s/it]\n",
|
||
|
" 44%|####4 | 88/198 [02:07<02:21, 1.29s/it]\n",
|
||
|
" 45%|####4 | 89/198 [02:08<02:11, 1.20s/it]\n",
|
||
|
" 45%|####5 | 90/198 [02:11<02:49, 1.57s/it]\n",
|
||
|
" 46%|####5 | 91/198 [02:14<03:32, 1.98s/it]\n",
|
||
|
" 46%|####6 | 92/198 [02:15<03:18, 1.87s/it]\n",
|
||
|
" 47%|####6 | 93/198 [02:16<02:48, 1.61s/it]\n",
|
||
|
" 47%|####7 | 94/198 [02:17<02:32, 1.47s/it]\n",
|
||
|
" 48%|####7 | 95/198 [02:19<02:41, 1.57s/it]\n",
|
||
|
" 48%|####8 | 96/198 [02:21<02:43, 1.60s/it]\n",
|
||
|
" 49%|####8 | 97/198 [02:22<02:27, 1.46s/it]\n",
|
||
|
" 49%|####9 | 98/198 [02:24<02:43, 1.63s/it]\n",
|
||
|
" 50%|##### | 99/198 [02:26<02:42, 1.64s/it]\n",
|
||
|
" 51%|##### | 100/198 [02:27<02:42, 1.66s/it]\n",
|
||
|
" 51%|#####1 | 101/198 [02:29<02:29, 1.54s/it]\n",
|
||
|
" 52%|#####1 | 102/198 [02:32<03:13, 2.01s/it]\n",
|
||
|
" 52%|#####2 | 103/198 [02:33<02:45, 1.74s/it]\n",
|
||
|
" 53%|#####2 | 104/198 [02:34<02:25, 1.55s/it]\n",
|
||
|
" 53%|#####3 | 105/198 [02:35<02:19, 1.50s/it]\n",
|
||
|
" 54%|#####3 | 106/198 [02:36<02:06, 1.37s/it]\n",
|
||
|
" 54%|#####4 | 107/198 [02:38<01:57, 1.29s/it]\n",
|
||
|
" 55%|#####4 | 108/198 [02:40<02:15, 1.50s/it]\n",
|
||
|
" 55%|#####5 | 109/198 [02:41<02:18, 1.56s/it]\n",
|
||
|
" 56%|#####5 | 110/198 [02:43<02:13, 1.52s/it]\n",
|
||
|
" 56%|#####6 | 111/198 [02:44<02:09, 1.48s/it]\n",
|
||
|
" 57%|#####6 | 112/198 [02:46<02:09, 1.51s/it]\n",
|
||
|
" 57%|#####7 | 113/198 [02:47<02:03, 1.45s/it]\n",
|
||
|
" 58%|#####7 | 114/198 [02:49<02:12, 1.58s/it]\n",
|
||
|
" 58%|#####8 | 115/198 [02:50<02:01, 1.46s/it]\n",
|
||
|
" 59%|#####8 | 116/198 [02:53<02:39, 1.95s/it]\n",
|
||
|
" 59%|#####9 | 117/198 [02:54<02:21, 1.74s/it]\n",
|
||
|
" 60%|#####9 | 118/198 [02:56<02:05, 1.57s/it]\n",
|
||
|
" 60%|###### | 119/198 [02:57<02:04, 1.57s/it]\n",
|
||
|
" 61%|###### | 120/198 [02:58<01:47, 1.38s/it]\n",
|
||
|
" 61%|######1 | 121/198 [03:00<01:55, 1.50s/it]\n",
|
||
|
" 62%|######1 | 122/198 [03:01<01:52, 1.48s/it]\n",
|
||
|
" 62%|######2 | 123/198 [03:03<01:54, 1.53s/it]\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
" 63%|######2 | 124/198 [03:04<01:46, 1.44s/it]\n",
|
||
|
" 63%|######3 | 125/198 [03:06<01:50, 1.51s/it]\n",
|
||
|
" 64%|######3 | 126/198 [03:07<01:40, 1.39s/it]\n",
|
||
|
" 64%|######4 | 127/198 [03:08<01:28, 1.24s/it]\n",
|
||
|
" 65%|######4 | 128/198 [03:09<01:23, 1.19s/it]\n",
|
||
|
" 65%|######5 | 129/198 [03:10<01:24, 1.22s/it]\n",
|
||
|
" 66%|######5 | 130/198 [03:11<01:21, 1.20s/it]\n",
|
||
|
" 66%|######6 | 131/198 [03:13<01:21, 1.21s/it]\n",
|
||
|
" 67%|######6 | 132/198 [03:13<01:14, 1.12s/it]\n",
|
||
|
" 67%|######7 | 133/198 [03:15<01:24, 1.30s/it]\n",
|
||
|
" 68%|######7 | 134/198 [03:16<01:21, 1.27s/it]\n",
|
||
|
" 68%|######8 | 135/198 [03:17<01:14, 1.18s/it]\n",
|
||
|
" 69%|######8 | 136/198 [03:18<01:09, 1.12s/it]\n",
|
||
|
" 69%|######9 | 137/198 [03:20<01:09, 1.14s/it]\n",
|
||
|
" 70%|######9 | 138/198 [03:21<01:17, 1.29s/it]\n",
|
||
|
" 70%|####### | 139/198 [03:24<01:48, 1.83s/it]\n",
|
||
|
" 71%|####### | 140/198 [03:26<01:43, 1.79s/it]\n",
|
||
|
" 71%|#######1 | 141/198 [03:28<01:41, 1.78s/it]\n",
|
||
|
" 72%|#######1 | 142/198 [03:29<01:37, 1.73s/it]\n",
|
||
|
" 72%|#######2 | 143/198 [03:31<01:39, 1.81s/it]\n",
|
||
|
" 73%|#######2 | 144/198 [03:33<01:38, 1.83s/it]\n",
|
||
|
" 73%|#######3 | 145/198 [03:36<01:56, 2.20s/it]\n",
|
||
|
" 74%|#######3 | 146/198 [03:38<01:47, 2.06s/it]\n",
|
||
|
" 74%|#######4 | 147/198 [03:39<01:29, 1.75s/it]\n",
|
||
|
" 75%|#######4 | 148/198 [03:40<01:22, 1.65s/it]\n",
|
||
|
" 75%|#######5 | 149/198 [03:42<01:14, 1.52s/it]\n",
|
||
|
" 76%|#######5 | 150/198 [03:43<01:08, 1.43s/it]\n",
|
||
|
" 76%|#######6 | 151/198 [03:44<01:01, 1.31s/it]\n",
|
||
|
" 77%|#######6 | 152/198 [03:45<00:54, 1.19s/it]\n",
|
||
|
" 77%|#######7 | 153/198 [03:47<01:09, 1.55s/it]\n",
|
||
|
" 78%|#######7 | 154/198 [03:49<01:10, 1.60s/it]\n",
|
||
|
" 78%|#######8 | 155/198 [03:50<01:08, 1.59s/it]\n",
|
||
|
" 79%|#######8 | 156/198 [03:51<00:58, 1.39s/it]\n",
|
||
|
" 79%|#######9 | 157/198 [03:54<01:17, 1.89s/it]\n",
|
||
|
" 80%|#######9 | 158/198 [03:55<01:04, 1.62s/it]\n",
|
||
|
" 80%|######## | 159/198 [03:57<00:59, 1.54s/it]\n",
|
||
|
" 81%|######## | 160/198 [03:58<00:53, 1.40s/it]\n",
|
||
|
" 81%|########1 | 161/198 [04:00<00:57, 1.56s/it]\n",
|
||
|
" 82%|########1 | 162/198 [04:01<00:54, 1.51s/it]\n",
|
||
|
" 82%|########2 | 163/198 [04:03<00:55, 1.57s/it]\n",
|
||
|
" 83%|########2 | 164/198 [04:04<00:52, 1.56s/it]\n",
|
||
|
" 83%|########3 | 165/198 [04:06<00:50, 1.54s/it]\n",
|
||
|
" 84%|########3 | 166/198 [04:07<00:45, 1.42s/it]\n",
|
||
|
" 84%|########4 | 167/198 [04:09<00:44, 1.43s/it]\n",
|
||
|
" 85%|########4 | 168/198 [04:10<00:45, 1.51s/it]\n",
|
||
|
" 85%|########5 | 169/198 [04:12<00:45, 1.57s/it]\n",
|
||
|
" 86%|########5 | 170/198 [04:14<00:45, 1.63s/it]\n",
|
||
|
" 86%|########6 | 171/198 [04:15<00:40, 1.52s/it]\n",
|
||
|
" 87%|########6 | 172/198 [04:17<00:40, 1.57s/it]\n",
|
||
|
" 87%|########7 | 173/198 [04:18<00:40, 1.62s/it]\n",
|
||
|
" 88%|########7 | 174/198 [04:20<00:38, 1.59s/it]\n",
|
||
|
" 88%|########8 | 175/198 [04:21<00:33, 1.44s/it]\n",
|
||
|
" 89%|########8 | 176/198 [04:23<00:35, 1.59s/it]\n",
|
||
|
" 89%|########9 | 177/198 [04:25<00:34, 1.64s/it]\n",
|
||
|
" 90%|########9 | 178/198 [04:26<00:28, 1.43s/it]\n",
|
||
|
" 90%|######### | 179/198 [04:27<00:25, 1.36s/it]\n",
|
||
|
" 91%|######### | 180/198 [04:28<00:23, 1.29s/it]\n",
|
||
|
" 91%|#########1| 181/198 [04:31<00:30, 1.81s/it]\n",
|
||
|
" 92%|#########1| 182/198 [04:32<00:25, 1.57s/it]\n",
|
||
|
" 92%|#########2| 183/198 [04:33<00:21, 1.41s/it]\n",
|
||
|
" 93%|#########2| 184/198 [04:34<00:19, 1.36s/it]\n",
|
||
|
" 93%|#########3| 185/198 [04:36<00:17, 1.35s/it]\n",
|
||
|
" 94%|#########3| 186/198 [04:37<00:17, 1.47s/it]\n",
|
||
|
" 94%|#########4| 187/198 [04:39<00:16, 1.54s/it]\n",
|
||
|
" 95%|#########4| 188/198 [04:40<00:13, 1.40s/it]\n",
|
||
|
" 95%|#########5| 189/198 [04:41<00:11, 1.29s/it]\n",
|
||
|
" 96%|#########5| 190/198 [04:42<00:10, 1.29s/it]\n",
|
||
|
" 96%|#########6| 191/198 [04:44<00:08, 1.26s/it]\n",
|
||
|
" 97%|#########6| 192/198 [04:45<00:07, 1.17s/it]\n",
|
||
|
" 97%|#########7| 193/198 [04:46<00:06, 1.32s/it]\n",
|
||
|
" 98%|#########7| 194/198 [04:47<00:05, 1.26s/it]\n",
|
||
|
" 98%|#########8| 195/198 [04:49<00:03, 1.28s/it]\n",
|
||
|
" 99%|#########8| 196/198 [04:50<00:02, 1.27s/it]\n",
|
||
|
" 99%|#########9| 197/198 [04:51<00:01, 1.20s/it]\n",
|
||
|
"100%|##########| 198/198 [04:52<00:00, 1.21s/it]02/16/2022 00:38:36 - INFO - __main__ - Epoch 0: {'accuracy': 0.676}\n",
|
||
|
"02/16/2022 00:39:05 - INFO - __main__ - Test-set evaluation: {'accuracy': 0.636}\n",
|
||
|
"Configuration saved in out/tweet/gpt2_version_3\\config.json\n",
|
||
|
"Model weights saved in out/tweet/gpt2_version_3\\pytorch_model.bin\n",
|
||
|
"tokenizer config file saved in out/tweet/gpt2_version_3\\tokenizer_config.json\n",
|
||
|
"Special tokens file saved in out/tweet/gpt2_version_3\\special_tokens_map.json\n",
|
||
|
"\n",
|
||
|
"100%|##########| 198/198 [05:56<00:00, 1.80s/it]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"!python run_glue_no_trainer.py \\\n",
|
||
|
" --model_name_or_path gpt2 \\\n",
|
||
|
" --train_file data/train.json \\\n",
|
||
|
" --validation_file data/valid.json \\\n",
|
||
|
" --test_file data/test.json \\\n",
|
||
|
" --per_device_train_batch_size 24 \\\n",
|
||
|
" --per_device_eval_batch_size 24 \\\n",
|
||
|
" --max_length 128 \\\n",
|
||
|
" --freeze_model \\\n",
|
||
|
" --custom_model \\\n",
|
||
|
" --learning_rate 2e-5 \\\n",
|
||
|
" --num_train_epochs 1 \\\n",
|
||
|
" --output_dir out/tweet/gpt2_version_3"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# GPT2 version 4"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"02/16/2022 00:39:07 - INFO - __main__ - Distributed environment: NO\n",
|
||
|
"Num processes: 1\n",
|
||
|
"Process index: 0\n",
|
||
|
"Local process index: 0\n",
|
||
|
"Device: cpu\n",
|
||
|
"Use FP16 precision: False\n",
|
||
|
"\n",
|
||
|
"02/16/2022 00:39:08 - WARNING - datasets.builder - Using custom data configuration default-67c9d932a627b7b8\n",
|
||
|
"02/16/2022 00:39:08 - WARNING - datasets.builder - Reusing dataset json (C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426)\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/3 [00:00<?, ?it/s]\n",
|
||
|
"100%|##########| 3/3 [00:00<00:00, 1505.31it/s]\n",
|
||
|
"loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
|
||
|
"Model config GPT2Config {\n",
|
||
|
" \"activation_function\": \"gelu_new\",\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"GPT2LMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"attn_pdrop\": 0.1,\n",
|
||
|
" \"bos_token_id\": 50256,\n",
|
||
|
" \"embd_pdrop\": 0.1,\n",
|
||
|
" \"eos_token_id\": 50256,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-05,\n",
|
||
|
" \"model_type\": \"gpt2\",\n",
|
||
|
" \"n_ctx\": 1024,\n",
|
||
|
" \"n_embd\": 768,\n",
|
||
|
" \"n_head\": 12,\n",
|
||
|
" \"n_inner\": null,\n",
|
||
|
" \"n_layer\": 12,\n",
|
||
|
" \"n_positions\": 1024,\n",
|
||
|
" \"reorder_and_upcast_attn\": false,\n",
|
||
|
" \"resid_pdrop\": 0.1,\n",
|
||
|
" \"scale_attn_by_inverse_layer_idx\": false,\n",
|
||
|
" \"scale_attn_weights\": true,\n",
|
||
|
" \"summary_activation\": null,\n",
|
||
|
" \"summary_first_dropout\": 0.1,\n",
|
||
|
" \"summary_proj_to_labels\": true,\n",
|
||
|
" \"summary_type\": \"cls_index\",\n",
|
||
|
" \"summary_use_proj\": true,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"text-generation\": {\n",
|
||
|
" \"do_sample\": true,\n",
|
||
|
" \"max_length\": 50\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50257\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
|
||
|
"loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
|
||
|
"Model config GPT2Config {\n",
|
||
|
" \"activation_function\": \"gelu_new\",\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"GPT2LMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"attn_pdrop\": 0.1,\n",
|
||
|
" \"bos_token_id\": 50256,\n",
|
||
|
" \"embd_pdrop\": 0.1,\n",
|
||
|
" \"eos_token_id\": 50256,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-05,\n",
|
||
|
" \"model_type\": \"gpt2\",\n",
|
||
|
" \"n_ctx\": 1024,\n",
|
||
|
" \"n_embd\": 768,\n",
|
||
|
" \"n_head\": 12,\n",
|
||
|
" \"n_inner\": null,\n",
|
||
|
" \"n_layer\": 12,\n",
|
||
|
" \"n_positions\": 1024,\n",
|
||
|
" \"reorder_and_upcast_attn\": false,\n",
|
||
|
" \"resid_pdrop\": 0.1,\n",
|
||
|
" \"scale_attn_by_inverse_layer_idx\": false,\n",
|
||
|
" \"scale_attn_weights\": true,\n",
|
||
|
" \"summary_activation\": null,\n",
|
||
|
" \"summary_first_dropout\": 0.1,\n",
|
||
|
" \"summary_proj_to_labels\": true,\n",
|
||
|
" \"summary_type\": \"cls_index\",\n",
|
||
|
" \"summary_use_proj\": true,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"text-generation\": {\n",
|
||
|
" \"do_sample\": true,\n",
|
||
|
" \"max_length\": 50\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50257\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/vocab.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\684fe667923972fb57f6b4dcb61a3c92763ad89882f3da5da9866baf14f2d60f.c7ed1f96aac49e745788faa77ba0a26a392643a50bb388b9c04ff469e555241f\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/merges.txt from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\c0c761a63004025aeadd530c4c27b860ec4ecbe8a00531233de21d865a402598.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/tokenizer.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\16a2f78023c8dc511294f0c97b5e10fde3ef9889ad6d11ffaa2a00714e73926e.cf2d0ecb83b6df91b3dbb53f1d1e4c311578bfd3aa0e04934215a49bf9898df0\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/added_tokens.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/special_tokens_map.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/tokenizer_config.json from cache at None\n",
|
||
|
"loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
|
||
|
"Model config GPT2Config {\n",
|
||
|
" \"activation_function\": \"gelu_new\",\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"GPT2LMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"attn_pdrop\": 0.1,\n",
|
||
|
" \"bos_token_id\": 50256,\n",
|
||
|
" \"embd_pdrop\": 0.1,\n",
|
||
|
" \"eos_token_id\": 50256,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-05,\n",
|
||
|
" \"model_type\": \"gpt2\",\n",
|
||
|
" \"n_ctx\": 1024,\n",
|
||
|
" \"n_embd\": 768,\n",
|
||
|
" \"n_head\": 12,\n",
|
||
|
" \"n_inner\": null,\n",
|
||
|
" \"n_layer\": 12,\n",
|
||
|
" \"n_positions\": 1024,\n",
|
||
|
" \"reorder_and_upcast_attn\": false,\n",
|
||
|
" \"resid_pdrop\": 0.1,\n",
|
||
|
" \"scale_attn_by_inverse_layer_idx\": false,\n",
|
||
|
" \"scale_attn_weights\": true,\n",
|
||
|
" \"summary_activation\": null,\n",
|
||
|
" \"summary_first_dropout\": 0.1,\n",
|
||
|
" \"summary_proj_to_labels\": true,\n",
|
||
|
" \"summary_type\": \"cls_index\",\n",
|
||
|
" \"summary_use_proj\": true,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"text-generation\": {\n",
|
||
|
" \"do_sample\": true,\n",
|
||
|
" \"max_length\": 50\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50257\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"02/16/2022 00:39:14 - INFO - __main__ - Return hidden states from model: True\n",
|
||
|
"02/16/2022 00:39:14 - INFO - __main__ - Using implementation from: GPT2ForSequenceClassificationCustom\n",
|
||
|
"loading weights file https://huggingface.co/gpt2/resolve/main/pytorch_model.bin from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\752929ace039baa8ef70fe21cdf9ab9445773d20e733cf693d667982e210837e.323c769945a351daa25546176f8208b3004b6f563438a7603e7932bae9025925\n",
|
||
|
"All model checkpoint weights were used when initializing GPT2ForSequenceClassificationCustom.\n",
|
||
|
"\n",
|
||
|
"Some weights of GPT2ForSequenceClassificationCustom were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.dense_1_hidden.weight', 'score.out_proj.weight', 'score.dense_1_input.bias', 'score.dense_2.bias', 'score.dense_2.weight', 'score.dense_1_input.weight', 'score.dense_1_hidden.bias']\n",
|
||
|
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
||
|
"02/16/2022 00:39:16 - INFO - __main__ - Freezing model weights\n",
|
||
|
"Using pad_token, but it is not set yet.\n",
|
||
|
"02/16/2022 00:39:16 - INFO - __main__ - Set PAD token to EOS: <|endoftext|>\n",
|
||
|
"02/16/2022 00:39:16 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-5a65b7038a57b5cc.arrow\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/1 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 1/1 [00:00<00:00, 34.58ba/s]\n",
|
||
|
"02/16/2022 00:39:16 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-5ed4052179e59c20.arrow\n",
|
||
|
"02/16/2022 00:39:16 - INFO - __main__ - Sample 3838 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [31, 7220, 662, 12, 24071, 2488, 7220, 1303, 83, 34715, 34191, 40252, 1492, 1909, 6184, 108, 126, 253, 126, 239, 26604, 27214, 126, 253, 126, 237, 126, 120, 220, 220, 1303, 3605, 76, 13513], 'labels': 0}.\n",
|
||
|
"02/16/2022 00:39:16 - INFO - __main__ - Sample 1761 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [10919, 257, 220, 995, 13, 611, 257, 582, 1718, 257, 15647, 588, 326, 11, 661, 561, 910, 340, 373, 5969, 13, 475, 275, 14, 66, 1303, 81, 623, 283, 1076, 88, 318, 257, 2415, 428, 318, 2938, 13], 'labels': 1}.\n",
|
||
|
"02/16/2022 00:39:16 - INFO - __main__ - Sample 1111 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [31, 7220, 2488, 7220, 612, 318, 645, 3338, 1295, 329, 15102, 287, 428, 1499, 780, 286, 661, 588, 345, 1303, 65, 1967, 220], 'labels': 1}.\n",
|
||
|
"02/16/2022 00:39:17 - INFO - __main__ - ***** Running training *****\n",
|
||
|
"02/16/2022 00:39:17 - INFO - __main__ - Num examples = 4742\n",
|
||
|
"02/16/2022 00:39:17 - INFO - __main__ - Num Epochs = 1\n",
|
||
|
"02/16/2022 00:39:17 - INFO - __main__ - Instantaneous batch size per device = 24\n",
|
||
|
"02/16/2022 00:39:17 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 24\n",
|
||
|
"02/16/2022 00:39:17 - INFO - __main__ - Gradient Accumulation steps = 1\n",
|
||
|
"02/16/2022 00:39:17 - INFO - __main__ - Total optimization steps = 198\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/198 [00:00<?, ?it/s]\n",
|
||
|
" 1%| | 1/198 [00:01<05:35, 1.70s/it]\n",
|
||
|
" 1%|1 | 2/198 [00:02<04:33, 1.39s/it]\n",
|
||
|
" 2%|1 | 3/198 [00:04<04:24, 1.35s/it]\n",
|
||
|
" 2%|2 | 4/198 [00:07<06:43, 2.08s/it]\n",
|
||
|
" 3%|2 | 5/198 [00:08<05:50, 1.81s/it]\n",
|
||
|
" 3%|3 | 6/198 [00:10<05:21, 1.68s/it]\n",
|
||
|
" 4%|3 | 7/198 [00:11<05:11, 1.63s/it]\n",
|
||
|
" 4%|4 | 8/198 [00:12<04:40, 1.48s/it]\n",
|
||
|
" 5%|4 | 9/198 [00:14<04:30, 1.43s/it]\n",
|
||
|
" 5%|5 | 10/198 [00:15<04:09, 1.32s/it]\n",
|
||
|
" 6%|5 | 11/198 [00:16<04:29, 1.44s/it]\n",
|
||
|
" 6%|6 | 12/198 [00:17<03:58, 1.28s/it]\n",
|
||
|
" 7%|6 | 13/198 [00:19<04:18, 1.40s/it]\n",
|
||
|
" 7%|7 | 14/198 [00:20<04:07, 1.35s/it]\n",
|
||
|
" 8%|7 | 15/198 [00:22<04:03, 1.33s/it]\n",
|
||
|
" 8%|8 | 16/198 [00:23<04:14, 1.40s/it]\n",
|
||
|
" 9%|8 | 17/198 [00:24<04:01, 1.33s/it]\n",
|
||
|
" 9%|9 | 18/198 [00:25<03:32, 1.18s/it]\n",
|
||
|
" 10%|9 | 19/198 [00:27<03:50, 1.29s/it]\n",
|
||
|
" 10%|# | 20/198 [00:28<04:09, 1.40s/it]\n",
|
||
|
" 11%|# | 21/198 [00:29<03:54, 1.32s/it]\n",
|
||
|
" 11%|#1 | 22/198 [00:30<03:33, 1.21s/it]\n",
|
||
|
" 12%|#1 | 23/198 [00:32<03:57, 1.36s/it]\n",
|
||
|
" 12%|#2 | 24/198 [00:33<03:45, 1.30s/it]\n",
|
||
|
" 13%|#2 | 25/198 [00:35<03:52, 1.34s/it]\n",
|
||
|
" 13%|#3 | 26/198 [00:36<03:39, 1.28s/it]\n",
|
||
|
" 14%|#3 | 27/198 [00:37<03:31, 1.23s/it]\n",
|
||
|
" 14%|#4 | 28/198 [00:39<04:07, 1.46s/it]\n",
|
||
|
" 15%|#4 | 29/198 [00:40<04:03, 1.44s/it]\n",
|
||
|
" 15%|#5 | 30/198 [00:41<03:35, 1.28s/it]\n",
|
||
|
" 16%|#5 | 31/198 [00:43<03:55, 1.41s/it]\n",
|
||
|
" 16%|#6 | 32/198 [00:45<04:08, 1.50s/it]\n",
|
||
|
" 17%|#6 | 33/198 [00:46<03:45, 1.36s/it]\n",
|
||
|
" 17%|#7 | 34/198 [00:48<04:39, 1.70s/it]\n",
|
||
|
" 18%|#7 | 35/198 [00:50<04:25, 1.63s/it]\n",
|
||
|
" 18%|#8 | 36/198 [00:51<04:02, 1.50s/it]\n",
|
||
|
" 19%|#8 | 37/198 [00:52<03:48, 1.42s/it]\n",
|
||
|
" 19%|#9 | 38/198 [00:53<03:35, 1.35s/it]\n",
|
||
|
" 20%|#9 | 39/198 [00:55<03:43, 1.40s/it]\n",
|
||
|
" 20%|## | 40/198 [00:56<03:49, 1.45s/it]\n",
|
||
|
" 21%|## | 41/198 [00:57<03:25, 1.31s/it]\n",
|
||
|
" 21%|##1 | 42/198 [00:59<03:33, 1.37s/it]\n",
|
||
|
" 22%|##1 | 43/198 [01:00<03:15, 1.26s/it]\n",
|
||
|
" 22%|##2 | 44/198 [01:01<03:06, 1.21s/it]\n",
|
||
|
" 23%|##2 | 45/198 [01:02<02:52, 1.13s/it]\n",
|
||
|
" 23%|##3 | 46/198 [01:03<02:58, 1.17s/it]\n",
|
||
|
" 24%|##3 | 47/198 [01:04<02:45, 1.09s/it]\n",
|
||
|
" 24%|##4 | 48/198 [01:05<02:39, 1.06s/it]\n",
|
||
|
" 25%|##4 | 49/198 [01:07<03:20, 1.34s/it]\n",
|
||
|
" 25%|##5 | 50/198 [01:08<03:16, 1.33s/it]\n",
|
||
|
" 26%|##5 | 51/198 [01:10<03:31, 1.44s/it]\n",
|
||
|
" 26%|##6 | 52/198 [01:12<03:31, 1.45s/it]\n",
|
||
|
" 27%|##6 | 53/198 [01:13<03:13, 1.33s/it]\n",
|
||
|
" 27%|##7 | 54/198 [01:14<02:53, 1.21s/it]\n",
|
||
|
" 28%|##7 | 55/198 [01:15<02:51, 1.20s/it]\n",
|
||
|
" 28%|##8 | 56/198 [01:16<03:11, 1.35s/it]\n",
|
||
|
" 29%|##8 | 57/198 [01:17<02:54, 1.24s/it]\n",
|
||
|
" 29%|##9 | 58/198 [01:19<02:58, 1.28s/it]\n",
|
||
|
" 30%|##9 | 59/198 [01:20<02:49, 1.22s/it]\n",
|
||
|
" 30%|### | 60/198 [01:21<02:53, 1.26s/it]\n",
|
||
|
" 31%|### | 61/198 [01:22<02:39, 1.17s/it]\n",
|
||
|
" 31%|###1 | 62/198 [01:25<03:56, 1.74s/it]\n",
|
||
|
" 32%|###1 | 63/198 [01:26<03:28, 1.54s/it]\n",
|
||
|
" 32%|###2 | 64/198 [01:29<04:29, 2.01s/it]\n",
|
||
|
" 33%|###2 | 65/198 [01:31<04:06, 1.85s/it]\n",
|
||
|
" 33%|###3 | 66/198 [01:33<04:24, 2.00s/it]\n",
|
||
|
" 34%|###3 | 67/198 [01:35<04:12, 1.93s/it]\n",
|
||
|
" 34%|###4 | 68/198 [01:36<03:33, 1.64s/it]\n",
|
||
|
" 35%|###4 | 69/198 [01:37<03:21, 1.56s/it]\n",
|
||
|
" 35%|###5 | 70/198 [01:38<02:58, 1.39s/it]\n",
|
||
|
" 36%|###5 | 71/198 [01:40<02:50, 1.34s/it]\n",
|
||
|
" 36%|###6 | 72/198 [01:41<03:01, 1.44s/it]\n",
|
||
|
" 37%|###6 | 73/198 [01:43<02:58, 1.43s/it]\n",
|
||
|
" 37%|###7 | 74/198 [01:44<02:46, 1.34s/it]\n",
|
||
|
" 38%|###7 | 75/198 [01:45<02:58, 1.45s/it]\n",
|
||
|
" 38%|###8 | 76/198 [01:47<03:17, 1.62s/it]\n",
|
||
|
" 39%|###8 | 77/198 [01:49<03:12, 1.59s/it]\n",
|
||
|
" 39%|###9 | 78/198 [01:51<03:26, 1.72s/it]\n",
|
||
|
" 40%|###9 | 79/198 [01:52<03:07, 1.57s/it]\n",
|
||
|
" 40%|#### | 80/198 [01:54<03:11, 1.62s/it]\n",
|
||
|
" 41%|#### | 81/198 [01:55<02:47, 1.43s/it]\n",
|
||
|
" 41%|####1 | 82/198 [01:57<02:51, 1.48s/it]\n",
|
||
|
" 42%|####1 | 83/198 [01:58<02:42, 1.41s/it]\n",
|
||
|
" 42%|####2 | 84/198 [01:59<02:32, 1.34s/it]\n",
|
||
|
" 43%|####2 | 85/198 [02:00<02:26, 1.30s/it]\n",
|
||
|
" 43%|####3 | 86/198 [02:01<02:13, 1.19s/it]\n",
|
||
|
" 44%|####3 | 87/198 [02:02<02:04, 1.12s/it]\n",
|
||
|
" 44%|####4 | 88/198 [02:03<02:10, 1.19s/it]\n",
|
||
|
" 45%|####4 | 89/198 [02:05<02:14, 1.23s/it]\n",
|
||
|
" 45%|####5 | 90/198 [02:08<03:13, 1.79s/it]\n",
|
||
|
" 46%|####5 | 91/198 [02:11<03:39, 2.05s/it]\n",
|
||
|
" 46%|####6 | 92/198 [02:12<03:08, 1.77s/it]\n",
|
||
|
" 47%|####6 | 93/198 [02:13<02:46, 1.59s/it]\n",
|
||
|
" 47%|####7 | 94/198 [02:15<02:57, 1.71s/it]\n",
|
||
|
" 48%|####7 | 95/198 [02:16<02:43, 1.59s/it]\n",
|
||
|
" 48%|####8 | 96/198 [02:18<02:47, 1.65s/it]\n",
|
||
|
" 49%|####8 | 97/198 [02:19<02:35, 1.54s/it]\n",
|
||
|
" 49%|####9 | 98/198 [02:20<02:23, 1.44s/it]\n",
|
||
|
" 50%|##### | 99/198 [02:22<02:36, 1.58s/it]\n",
|
||
|
" 51%|##### | 100/198 [02:25<03:02, 1.87s/it]\n",
|
||
|
" 51%|#####1 | 101/198 [02:26<02:43, 1.68s/it]\n",
|
||
|
" 52%|#####1 | 102/198 [02:28<02:43, 1.71s/it]\n",
|
||
|
" 52%|#####2 | 103/198 [02:30<02:49, 1.78s/it]\n",
|
||
|
" 53%|#####2 | 104/198 [02:32<02:48, 1.79s/it]\n",
|
||
|
" 53%|#####3 | 105/198 [02:33<02:24, 1.55s/it]\n",
|
||
|
" 54%|#####3 | 106/198 [02:34<02:12, 1.44s/it]\n",
|
||
|
" 54%|#####4 | 107/198 [02:35<01:57, 1.29s/it]\n",
|
||
|
" 55%|#####4 | 108/198 [02:36<02:05, 1.39s/it]\n",
|
||
|
" 55%|#####5 | 109/198 [02:38<02:09, 1.45s/it]\n",
|
||
|
" 56%|#####5 | 110/198 [02:40<02:15, 1.54s/it]\n",
|
||
|
" 56%|#####6 | 111/198 [02:41<02:18, 1.59s/it]\n",
|
||
|
" 57%|#####6 | 112/198 [02:43<02:19, 1.63s/it]\n",
|
||
|
" 57%|#####7 | 113/198 [02:44<02:00, 1.41s/it]\n",
|
||
|
" 58%|#####7 | 114/198 [02:45<01:54, 1.36s/it]\n",
|
||
|
" 58%|#####8 | 115/198 [02:47<02:01, 1.46s/it]\n",
|
||
|
" 59%|#####8 | 116/198 [02:49<02:05, 1.53s/it]\n",
|
||
|
" 59%|#####9 | 117/198 [02:50<01:50, 1.36s/it]\n",
|
||
|
" 60%|#####9 | 118/198 [02:51<01:48, 1.36s/it]\n",
|
||
|
" 60%|###### | 119/198 [02:53<01:56, 1.47s/it]\n",
|
||
|
" 61%|###### | 120/198 [02:54<01:42, 1.31s/it]\n",
|
||
|
" 61%|######1 | 121/198 [02:55<01:34, 1.22s/it]\n",
|
||
|
" 62%|######1 | 122/198 [02:56<01:43, 1.36s/it]\n",
|
||
|
" 62%|######2 | 123/198 [02:57<01:37, 1.30s/it]\n",
|
||
|
" 63%|######2 | 124/198 [02:59<01:32, 1.26s/it]\n",
|
||
|
" 63%|######3 | 125/198 [03:00<01:28, 1.21s/it]\n",
|
||
|
" 64%|######3 | 126/198 [03:01<01:36, 1.34s/it]\n",
|
||
|
" 64%|######4 | 127/198 [03:03<01:37, 1.37s/it]\n",
|
||
|
" 65%|######4 | 128/198 [03:04<01:32, 1.32s/it]\n",
|
||
|
" 65%|######5 | 129/198 [03:05<01:28, 1.28s/it]\n",
|
||
|
" 66%|######5 | 130/198 [03:07<01:34, 1.39s/it]\n",
|
||
|
" 66%|######6 | 131/198 [03:10<02:06, 1.89s/it]\n",
|
||
|
" 67%|######6 | 132/198 [03:11<01:53, 1.73s/it]\n",
|
||
|
" 67%|######7 | 133/198 [03:14<02:20, 2.17s/it]\n",
|
||
|
" 68%|######7 | 134/198 [03:16<02:14, 2.10s/it]\n",
|
||
|
" 68%|######8 | 135/198 [03:18<02:06, 2.00s/it]\n",
|
||
|
" 69%|######8 | 136/198 [03:19<01:44, 1.68s/it]\n",
|
||
|
" 69%|######9 | 137/198 [03:20<01:29, 1.47s/it]\n",
|
||
|
" 70%|######9 | 138/198 [03:21<01:20, 1.34s/it]\n",
|
||
|
" 70%|####### | 139/198 [03:22<01:17, 1.32s/it]\n",
|
||
|
" 71%|####### | 140/198 [03:24<01:12, 1.25s/it]\n",
|
||
|
" 71%|#######1 | 141/198 [03:26<01:39, 1.75s/it]\n",
|
||
|
" 72%|#######1 | 142/198 [03:28<01:35, 1.70s/it]\n",
|
||
|
" 72%|#######2 | 143/198 [03:29<01:22, 1.50s/it]\n",
|
||
|
" 73%|#######2 | 144/198 [03:30<01:14, 1.38s/it]\n",
|
||
|
" 73%|#######3 | 145/198 [03:31<01:07, 1.27s/it]\n",
|
||
|
" 74%|#######3 | 146/198 [03:33<01:12, 1.40s/it]\n",
|
||
|
" 74%|#######4 | 147/198 [03:34<01:06, 1.30s/it]\n",
|
||
|
" 75%|#######4 | 148/198 [03:37<01:29, 1.78s/it]\n",
|
||
|
" 75%|#######5 | 149/198 [03:38<01:18, 1.61s/it]\n",
|
||
|
" 76%|#######5 | 150/198 [03:40<01:23, 1.73s/it]\n",
|
||
|
" 76%|#######6 | 151/198 [03:41<01:13, 1.57s/it]\n",
|
||
|
" 77%|#######6 | 152/198 [03:42<01:02, 1.37s/it]\n",
|
||
|
" 77%|#######7 | 153/198 [03:43<00:57, 1.29s/it]\n",
|
||
|
" 78%|#######7 | 154/198 [03:45<01:02, 1.41s/it]\n",
|
||
|
" 78%|#######8 | 155/198 [03:47<01:05, 1.52s/it]\n",
|
||
|
" 79%|#######8 | 156/198 [03:48<00:59, 1.43s/it]\n",
|
||
|
" 79%|#######9 | 157/198 [03:50<01:02, 1.51s/it]\n",
|
||
|
" 80%|#######9 | 158/198 [03:51<00:54, 1.37s/it]\n",
|
||
|
" 80%|######## | 159/198 [03:52<00:49, 1.27s/it]\n",
|
||
|
" 81%|######## | 160/198 [03:53<00:46, 1.22s/it]\n",
|
||
|
" 81%|########1 | 161/198 [03:54<00:42, 1.15s/it]\n",
|
||
|
" 82%|########1 | 162/198 [03:55<00:41, 1.15s/it]\n",
|
||
|
" 82%|########2 | 163/198 [03:56<00:38, 1.09s/it]\n",
|
||
|
" 83%|########2 | 164/198 [03:57<00:35, 1.05s/it]\n",
|
||
|
" 83%|########3 | 165/198 [03:58<00:40, 1.23s/it]\n",
|
||
|
" 84%|########3 | 166/198 [04:00<00:39, 1.24s/it]\n",
|
||
|
" 84%|########4 | 167/198 [04:01<00:41, 1.35s/it]\n",
|
||
|
" 85%|########4 | 168/198 [04:04<00:54, 1.81s/it]\n",
|
||
|
" 85%|########5 | 169/198 [04:07<00:57, 2.00s/it]\n",
|
||
|
" 86%|########5 | 170/198 [04:09<00:57, 2.04s/it]\n",
|
||
|
" 86%|########6 | 171/198 [04:10<00:47, 1.75s/it]\n",
|
||
|
" 87%|########6 | 172/198 [04:12<00:44, 1.73s/it]\n",
|
||
|
" 87%|########7 | 173/198 [04:13<00:37, 1.49s/it]\n",
|
||
|
" 88%|########7 | 174/198 [04:14<00:38, 1.62s/it]\n",
|
||
|
" 88%|########8 | 175/198 [04:16<00:36, 1.58s/it]\n",
|
||
|
" 89%|########8 | 176/198 [04:17<00:30, 1.40s/it]\n",
|
||
|
" 89%|########9 | 177/198 [04:19<00:32, 1.54s/it]\n",
|
||
|
" 90%|########9 | 178/198 [04:20<00:27, 1.36s/it]\n",
|
||
|
" 90%|######### | 179/198 [04:21<00:27, 1.43s/it]\n",
|
||
|
" 91%|######### | 180/198 [04:22<00:23, 1.33s/it]\n",
|
||
|
" 91%|#########1| 181/198 [04:24<00:21, 1.28s/it]\n",
|
||
|
" 92%|#########1| 182/198 [04:25<00:19, 1.21s/it]\n",
|
||
|
" 92%|#########2| 183/198 [04:27<00:23, 1.57s/it]\n",
|
||
|
" 93%|#########2| 184/198 [04:28<00:20, 1.49s/it]\n",
|
||
|
" 93%|#########3| 185/198 [04:29<00:18, 1.39s/it]\n",
|
||
|
" 94%|#########3| 186/198 [04:33<00:22, 1.88s/it]\n",
|
||
|
" 94%|#########4| 187/198 [04:34<00:17, 1.63s/it]\n",
|
||
|
" 95%|#########4| 188/198 [04:35<00:16, 1.64s/it]\n",
|
||
|
" 95%|#########5| 189/198 [04:36<00:13, 1.52s/it]\n",
|
||
|
" 96%|#########5| 190/198 [04:37<00:10, 1.37s/it]\n",
|
||
|
" 96%|#########6| 191/198 [04:39<00:09, 1.41s/it]\n",
|
||
|
" 97%|#########6| 192/198 [04:41<00:09, 1.55s/it]\n",
|
||
|
" 97%|#########7| 193/198 [04:43<00:08, 1.62s/it]\n",
|
||
|
" 98%|#########7| 194/198 [04:44<00:06, 1.65s/it]\n",
|
||
|
" 98%|#########8| 195/198 [04:46<00:04, 1.63s/it]\n",
|
||
|
" 99%|#########8| 196/198 [04:47<00:03, 1.53s/it]\n",
|
||
|
" 99%|#########9| 197/198 [04:49<00:01, 1.50s/it]\n",
|
||
|
"100%|##########| 198/198 [04:49<00:00, 1.21s/it]02/16/2022 00:44:41 - INFO - __main__ - Epoch 0: {'accuracy': 0.728}\n",
|
||
|
"02/16/2022 00:45:10 - INFO - __main__ - Test-set evaluation: {'accuracy': 0.732}\n",
|
||
|
"Configuration saved in out/tweet/gpt2_version_4\\config.json\n",
|
||
|
"Model weights saved in out/tweet/gpt2_version_4\\pytorch_model.bin\n",
|
||
|
"tokenizer config file saved in out/tweet/gpt2_version_4\\tokenizer_config.json\n",
|
||
|
"Special tokens file saved in out/tweet/gpt2_version_4\\special_tokens_map.json\n",
|
||
|
"\n",
|
||
|
"100%|##########| 198/198 [05:53<00:00, 1.78s/it]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"!python run_glue_no_trainer.py \\\n",
|
||
|
" --model_name_or_path gpt2 \\\n",
|
||
|
" --train_file data/train.json \\\n",
|
||
|
" --validation_file data/valid.json \\\n",
|
||
|
" --test_file data/test.json \\\n",
|
||
|
" --per_device_train_batch_size 24 \\\n",
|
||
|
" --per_device_eval_batch_size 24 \\\n",
|
||
|
" --max_length 128 \\\n",
|
||
|
" --freeze_model \\\n",
|
||
|
" --custom_model \\\n",
|
||
|
" --return_hidden_states \\\n",
|
||
|
" --learning_rate 2e-5 \\\n",
|
||
|
" --num_train_epochs 1 \\\n",
|
||
|
" --output_dir out/tweet/gpt2_version_4"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# GPT2 version 5"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 27,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"02/17/2022 17:37:38 - INFO - __main__ - Distributed environment: NO\n",
|
||
|
"Num processes: 1\n",
|
||
|
"Process index: 0\n",
|
||
|
"Local process index: 0\n",
|
||
|
"Device: cpu\n",
|
||
|
"Use FP16 precision: False\n",
|
||
|
"\n",
|
||
|
"02/17/2022 17:37:39 - WARNING - datasets.builder - Using custom data configuration default-67c9d932a627b7b8\n",
|
||
|
"02/17/2022 17:37:39 - WARNING - datasets.builder - Reusing dataset json (C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426)\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/3 [00:00<?, ?it/s]\n",
|
||
|
"100%|##########| 3/3 [00:00<00:00, 1503.87it/s]\n",
|
||
|
"loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
|
||
|
"Model config GPT2Config {\n",
|
||
|
" \"activation_function\": \"gelu_new\",\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"GPT2LMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"attn_pdrop\": 0.1,\n",
|
||
|
" \"bos_token_id\": 50256,\n",
|
||
|
" \"embd_pdrop\": 0.1,\n",
|
||
|
" \"eos_token_id\": 50256,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-05,\n",
|
||
|
" \"model_type\": \"gpt2\",\n",
|
||
|
" \"n_ctx\": 1024,\n",
|
||
|
" \"n_embd\": 768,\n",
|
||
|
" \"n_head\": 12,\n",
|
||
|
" \"n_inner\": null,\n",
|
||
|
" \"n_layer\": 12,\n",
|
||
|
" \"n_positions\": 1024,\n",
|
||
|
" \"reorder_and_upcast_attn\": false,\n",
|
||
|
" \"resid_pdrop\": 0.1,\n",
|
||
|
" \"scale_attn_by_inverse_layer_idx\": false,\n",
|
||
|
" \"scale_attn_weights\": true,\n",
|
||
|
" \"summary_activation\": null,\n",
|
||
|
" \"summary_first_dropout\": 0.1,\n",
|
||
|
" \"summary_proj_to_labels\": true,\n",
|
||
|
" \"summary_type\": \"cls_index\",\n",
|
||
|
" \"summary_use_proj\": true,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"text-generation\": {\n",
|
||
|
" \"do_sample\": true,\n",
|
||
|
" \"max_length\": 50\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50257\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
|
||
|
"loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
|
||
|
"Model config GPT2Config {\n",
|
||
|
" \"activation_function\": \"gelu_new\",\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"GPT2LMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"attn_pdrop\": 0.1,\n",
|
||
|
" \"bos_token_id\": 50256,\n",
|
||
|
" \"embd_pdrop\": 0.1,\n",
|
||
|
" \"eos_token_id\": 50256,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-05,\n",
|
||
|
" \"model_type\": \"gpt2\",\n",
|
||
|
" \"n_ctx\": 1024,\n",
|
||
|
" \"n_embd\": 768,\n",
|
||
|
" \"n_head\": 12,\n",
|
||
|
" \"n_inner\": null,\n",
|
||
|
" \"n_layer\": 12,\n",
|
||
|
" \"n_positions\": 1024,\n",
|
||
|
" \"reorder_and_upcast_attn\": false,\n",
|
||
|
" \"resid_pdrop\": 0.1,\n",
|
||
|
" \"scale_attn_by_inverse_layer_idx\": false,\n",
|
||
|
" \"scale_attn_weights\": true,\n",
|
||
|
" \"summary_activation\": null,\n",
|
||
|
" \"summary_first_dropout\": 0.1,\n",
|
||
|
" \"summary_proj_to_labels\": true,\n",
|
||
|
" \"summary_type\": \"cls_index\",\n",
|
||
|
" \"summary_use_proj\": true,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"text-generation\": {\n",
|
||
|
" \"do_sample\": true,\n",
|
||
|
" \"max_length\": 50\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50257\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/vocab.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\684fe667923972fb57f6b4dcb61a3c92763ad89882f3da5da9866baf14f2d60f.c7ed1f96aac49e745788faa77ba0a26a392643a50bb388b9c04ff469e555241f\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/merges.txt from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\c0c761a63004025aeadd530c4c27b860ec4ecbe8a00531233de21d865a402598.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/tokenizer.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\16a2f78023c8dc511294f0c97b5e10fde3ef9889ad6d11ffaa2a00714e73926e.cf2d0ecb83b6df91b3dbb53f1d1e4c311578bfd3aa0e04934215a49bf9898df0\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/added_tokens.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/special_tokens_map.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/gpt2/resolve/main/tokenizer_config.json from cache at None\n",
|
||
|
"loading configuration file https://huggingface.co/gpt2/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fc674cd6907b4c9e933cb42d67662436b89fa9540a1f40d7c919d0109289ad01.7d2e0efa5ca20cef4fb199382111e9d3ad96fd77b849e1d4bed13a66e1336f51\n",
|
||
|
"Model config GPT2Config {\n",
|
||
|
" \"activation_function\": \"gelu_new\",\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"GPT2LMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"attn_pdrop\": 0.1,\n",
|
||
|
" \"bos_token_id\": 50256,\n",
|
||
|
" \"embd_pdrop\": 0.1,\n",
|
||
|
" \"eos_token_id\": 50256,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-05,\n",
|
||
|
" \"model_type\": \"gpt2\",\n",
|
||
|
" \"n_ctx\": 1024,\n",
|
||
|
" \"n_embd\": 768,\n",
|
||
|
" \"n_head\": 12,\n",
|
||
|
" \"n_inner\": null,\n",
|
||
|
" \"n_layer\": 12,\n",
|
||
|
" \"n_positions\": 1024,\n",
|
||
|
" \"reorder_and_upcast_attn\": false,\n",
|
||
|
" \"resid_pdrop\": 0.1,\n",
|
||
|
" \"scale_attn_by_inverse_layer_idx\": false,\n",
|
||
|
" \"scale_attn_weights\": true,\n",
|
||
|
" \"summary_activation\": null,\n",
|
||
|
" \"summary_first_dropout\": 0.1,\n",
|
||
|
" \"summary_proj_to_labels\": true,\n",
|
||
|
" \"summary_type\": \"cls_index\",\n",
|
||
|
" \"summary_use_proj\": true,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"text-generation\": {\n",
|
||
|
" \"do_sample\": true,\n",
|
||
|
" \"max_length\": 50\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50257\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"02/17/2022 17:37:45 - INFO - __main__ - Return hidden states from model: True\n",
|
||
|
"02/17/2022 17:37:45 - INFO - __main__ - Using implementation from: GPT2ForSequenceClassificationCustom\n",
|
||
|
"loading weights file https://huggingface.co/gpt2/resolve/main/pytorch_model.bin from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\752929ace039baa8ef70fe21cdf9ab9445773d20e733cf693d667982e210837e.323c769945a351daa25546176f8208b3004b6f563438a7603e7932bae9025925\n",
|
||
|
"All model checkpoint weights were used when initializing GPT2ForSequenceClassificationCustom.\n",
|
||
|
"\n",
|
||
|
"Some weights of GPT2ForSequenceClassificationCustom were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.dense_1_input.weight', 'score.dense_2.weight', 'score.dense_2.bias', 'score.out_proj.weight', 'score.dense_1_hidden.weight', 'score.dense_1_hidden.bias', 'score.dense_1_input.bias']\n",
|
||
|
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
||
|
"02/17/2022 17:37:47 - INFO - __main__ - Freezing model weights\n",
|
||
|
"Using pad_token, but it is not set yet.\n",
|
||
|
"02/17/2022 17:37:47 - INFO - __main__ - Set PAD token to EOS: <|endoftext|>\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/5 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 5/5 [00:00<00:00, 46.33ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 5/5 [00:00<00:00, 46.33ba/s]\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/1 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 1/1 [00:00<00:00, 83.55ba/s]\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/1 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 1/1 [00:00<00:00, 100.09ba/s]\n",
|
||
|
"02/17/2022 17:37:48 - INFO - __main__ - Sample 4558 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [31, 7220, 4953, 287, 262, 3223, 329, 616, 717, 2646, 286, 2488, 7220, 543, 318, 2488, 7220, 220, 220, 220, 1303, 41364, 469, 988, 1303, 276, 26240, 23411, 6184, 95, 126, 222, 126, 242, 986, 220], 'labels': 0}.\n",
|
||
|
"02/17/2022 17:37:48 - INFO - __main__ - Sample 2249 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [1169, 1306, 1524, 614, 318, 262, 614, 329, 26420, 13, 27214, 126, 253, 126, 246, 5196, 460, 470, 892, 546, 326, 6184, 108, 126, 253, 126, 246, 3907, 1303, 14347, 1303, 1069, 4105, 220, 220, 1303, 37035, 1303, 320, 12756, 1303, 529, 669, 6042, 1303, 260, 10396, 3508, 1251, 1303, 15219], 'labels': 0}.\n",
|
||
|
"02/17/2022 17:37:48 - INFO - __main__ - Sample 1448 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [2, 1416, 81, 315, 259, 1096, 1303, 82, 5570, 1222, 696, 26, 1303, 6381, 3455, 1303, 403, 6667, 11203, 540, 1303, 354, 5233, 1303, 2256, 6615, 287, 705, 32243, 1028, 10713, 25, 9265, 6, 220, 220], 'labels': 1}.\n",
|
||
|
"02/17/2022 17:37:48 - INFO - __main__ - ***** Running training *****\n",
|
||
|
"02/17/2022 17:37:48 - INFO - __main__ - Num examples = 4742\n",
|
||
|
"02/17/2022 17:37:48 - INFO - __main__ - Num Epochs = 1\n",
|
||
|
"02/17/2022 17:37:48 - INFO - __main__ - Instantaneous batch size per device = 32\n",
|
||
|
"02/17/2022 17:37:48 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 32\n",
|
||
|
"02/17/2022 17:37:48 - INFO - __main__ - Gradient Accumulation steps = 1\n",
|
||
|
"02/17/2022 17:37:48 - INFO - __main__ - Total optimization steps = 149\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/149 [00:00<?, ?it/s]\n",
|
||
|
" 1%| | 1/149 [00:01<03:56, 1.60s/it]\n",
|
||
|
" 1%|1 | 2/149 [00:02<03:34, 1.46s/it]\n",
|
||
|
" 2%|2 | 3/149 [00:04<03:39, 1.50s/it]\n",
|
||
|
" 3%|2 | 4/149 [00:06<03:45, 1.56s/it]\n",
|
||
|
" 3%|3 | 5/149 [00:08<04:23, 1.83s/it]\n",
|
||
|
" 4%|4 | 6/149 [00:10<04:38, 1.95s/it]\n",
|
||
|
" 5%|4 | 7/149 [00:12<04:25, 1.87s/it]\n",
|
||
|
" 5%|5 | 8/149 [00:15<05:38, 2.40s/it]\n",
|
||
|
" 6%|6 | 9/149 [00:17<05:01, 2.15s/it]\n",
|
||
|
" 7%|6 | 10/149 [00:18<04:14, 1.83s/it]\n",
|
||
|
" 7%|7 | 11/149 [00:20<04:15, 1.85s/it]\n",
|
||
|
" 8%|8 | 12/149 [00:22<04:06, 1.80s/it]\n",
|
||
|
" 9%|8 | 13/149 [00:23<04:01, 1.78s/it]\n",
|
||
|
" 9%|9 | 14/149 [00:25<03:45, 1.67s/it]\n",
|
||
|
" 10%|# | 15/149 [00:27<04:19, 1.94s/it]\n",
|
||
|
" 11%|# | 16/149 [00:32<06:04, 2.74s/it]\n",
|
||
|
" 11%|#1 | 17/149 [00:35<06:24, 2.91s/it]\n",
|
||
|
" 12%|#2 | 18/149 [00:38<06:25, 2.94s/it]\n",
|
||
|
" 13%|#2 | 19/149 [00:42<06:51, 3.16s/it]\n",
|
||
|
" 13%|#3 | 20/149 [00:43<05:40, 2.64s/it]\n",
|
||
|
" 14%|#4 | 21/149 [00:46<05:43, 2.69s/it]\n",
|
||
|
" 15%|#4 | 22/149 [00:49<05:27, 2.58s/it]\n",
|
||
|
" 15%|#5 | 23/149 [00:50<04:55, 2.34s/it]\n",
|
||
|
" 16%|#6 | 24/149 [00:52<04:25, 2.12s/it]\n",
|
||
|
" 17%|#6 | 25/149 [00:54<04:03, 1.96s/it]\n",
|
||
|
" 17%|#7 | 26/149 [00:58<05:31, 2.70s/it]\n",
|
||
|
" 18%|#8 | 27/149 [01:00<04:47, 2.36s/it]\n",
|
||
|
" 19%|#8 | 28/149 [01:01<04:25, 2.20s/it]\n",
|
||
|
" 19%|#9 | 29/149 [01:06<05:41, 2.85s/it]\n",
|
||
|
" 20%|## | 30/149 [01:07<04:59, 2.52s/it]\n",
|
||
|
" 21%|## | 31/149 [01:10<04:42, 2.40s/it]\n",
|
||
|
" 21%|##1 | 32/149 [01:11<04:10, 2.14s/it]\n",
|
||
|
" 22%|##2 | 33/149 [01:13<03:50, 1.99s/it]\n",
|
||
|
" 23%|##2 | 34/149 [01:17<05:09, 2.69s/it]\n",
|
||
|
" 23%|##3 | 35/149 [01:20<05:08, 2.71s/it]\n",
|
||
|
" 24%|##4 | 36/149 [01:24<06:03, 3.22s/it]\n",
|
||
|
" 25%|##4 | 37/149 [01:27<05:31, 2.96s/it]\n",
|
||
|
" 26%|##5 | 38/149 [01:29<05:04, 2.75s/it]\n",
|
||
|
" 26%|##6 | 39/149 [01:31<04:47, 2.62s/it]\n",
|
||
|
" 27%|##6 | 40/149 [01:33<04:18, 2.37s/it]\n",
|
||
|
" 28%|##7 | 41/149 [01:34<03:43, 2.07s/it]\n",
|
||
|
" 28%|##8 | 42/149 [01:37<03:48, 2.13s/it]\n",
|
||
|
" 29%|##8 | 43/149 [01:38<03:22, 1.91s/it]\n",
|
||
|
" 30%|##9 | 44/149 [01:39<03:06, 1.77s/it]\n",
|
||
|
" 30%|### | 45/149 [01:41<02:49, 1.63s/it]\n",
|
||
|
" 31%|### | 46/149 [01:43<03:07, 1.82s/it]\n",
|
||
|
" 32%|###1 | 47/149 [01:44<02:52, 1.69s/it]\n",
|
||
|
" 32%|###2 | 48/149 [01:46<02:47, 1.66s/it]\n",
|
||
|
" 33%|###2 | 49/149 [01:48<02:47, 1.67s/it]\n",
|
||
|
" 34%|###3 | 50/149 [01:50<03:03, 1.86s/it]\n",
|
||
|
" 34%|###4 | 51/149 [01:53<03:50, 2.35s/it]\n",
|
||
|
" 35%|###4 | 52/149 [01:56<03:44, 2.31s/it]\n",
|
||
|
" 36%|###5 | 53/149 [01:58<03:40, 2.30s/it]\n",
|
||
|
" 36%|###6 | 54/149 [02:00<03:27, 2.18s/it]\n",
|
||
|
" 37%|###6 | 55/149 [02:03<03:38, 2.33s/it]\n",
|
||
|
" 38%|###7 | 56/149 [02:05<03:38, 2.34s/it]\n",
|
||
|
" 38%|###8 | 57/149 [02:07<03:33, 2.32s/it]\n",
|
||
|
" 39%|###8 | 58/149 [02:11<04:01, 2.65s/it]\n",
|
||
|
" 40%|###9 | 59/149 [02:13<03:44, 2.50s/it]\n",
|
||
|
" 40%|#### | 60/149 [02:15<03:38, 2.45s/it]\n",
|
||
|
" 41%|#### | 61/149 [02:17<03:11, 2.18s/it]\n",
|
||
|
" 42%|####1 | 62/149 [02:18<02:45, 1.90s/it]\n",
|
||
|
" 42%|####2 | 63/149 [02:20<02:53, 2.02s/it]\n",
|
||
|
" 43%|####2 | 64/149 [02:22<02:46, 1.96s/it]\n",
|
||
|
" 44%|####3 | 65/149 [02:23<02:32, 1.81s/it]\n",
|
||
|
" 44%|####4 | 66/149 [02:25<02:35, 1.88s/it]\n",
|
||
|
" 45%|####4 | 67/149 [02:28<02:50, 2.08s/it]\n",
|
||
|
" 46%|####5 | 68/149 [02:29<02:30, 1.86s/it]\n",
|
||
|
" 46%|####6 | 69/149 [02:32<02:47, 2.09s/it]\n",
|
||
|
" 47%|####6 | 70/149 [02:34<02:31, 1.91s/it]\n",
|
||
|
" 48%|####7 | 71/149 [02:36<02:37, 2.02s/it]\n",
|
||
|
" 48%|####8 | 72/149 [02:38<02:44, 2.14s/it]\n",
|
||
|
" 49%|####8 | 73/149 [02:41<02:47, 2.20s/it]\n",
|
||
|
" 50%|####9 | 74/149 [02:42<02:27, 1.97s/it]\n",
|
||
|
" 50%|##### | 75/149 [02:43<02:14, 1.82s/it]\n",
|
||
|
" 51%|#####1 | 76/149 [02:45<02:05, 1.72s/it]\n",
|
||
|
" 52%|#####1 | 77/149 [02:47<02:15, 1.88s/it]\n",
|
||
|
" 52%|#####2 | 78/149 [02:48<02:00, 1.70s/it]\n",
|
||
|
" 53%|#####3 | 79/149 [02:50<01:50, 1.58s/it]\n",
|
||
|
" 54%|#####3 | 80/149 [02:52<02:11, 1.90s/it]\n",
|
||
|
" 54%|#####4 | 81/149 [02:54<02:07, 1.87s/it]\n",
|
||
|
" 55%|#####5 | 82/149 [02:56<02:12, 1.98s/it]\n",
|
||
|
" 56%|#####5 | 83/149 [02:59<02:16, 2.07s/it]\n",
|
||
|
" 56%|#####6 | 84/149 [03:01<02:12, 2.03s/it]\n",
|
||
|
" 57%|#####7 | 85/149 [03:05<02:50, 2.67s/it]\n",
|
||
|
" 58%|#####7 | 86/149 [03:06<02:23, 2.27s/it]\n",
|
||
|
" 58%|#####8 | 87/149 [03:08<02:10, 2.11s/it]\n",
|
||
|
" 59%|#####9 | 88/149 [03:10<01:59, 1.95s/it]\n",
|
||
|
" 60%|#####9 | 89/149 [03:12<02:02, 2.04s/it]\n",
|
||
|
" 60%|###### | 90/149 [03:14<02:01, 2.06s/it]\n",
|
||
|
" 61%|######1 | 91/149 [03:15<01:51, 1.93s/it]\n",
|
||
|
" 62%|######1 | 92/149 [03:17<01:40, 1.76s/it]\n",
|
||
|
" 62%|######2 | 93/149 [03:18<01:29, 1.60s/it]\n",
|
||
|
" 63%|######3 | 94/149 [03:22<02:13, 2.42s/it]\n",
|
||
|
" 64%|######3 | 95/149 [03:25<02:15, 2.50s/it]\n",
|
||
|
" 64%|######4 | 96/149 [03:29<02:37, 2.96s/it]\n",
|
||
|
" 65%|######5 | 97/149 [03:31<02:17, 2.64s/it]\n",
|
||
|
" 66%|######5 | 98/149 [03:33<02:03, 2.43s/it]\n",
|
||
|
" 66%|######6 | 99/149 [03:35<01:51, 2.24s/it]\n",
|
||
|
" 67%|######7 | 100/149 [03:37<01:49, 2.24s/it]\n",
|
||
|
" 68%|######7 | 101/149 [03:38<01:35, 2.00s/it]\n",
|
||
|
" 68%|######8 | 102/149 [03:40<01:23, 1.79s/it]\n",
|
||
|
" 69%|######9 | 103/149 [03:44<01:54, 2.49s/it]\n",
|
||
|
" 70%|######9 | 104/149 [03:46<01:42, 2.29s/it]\n",
|
||
|
" 70%|####### | 105/149 [03:48<01:40, 2.28s/it]\n",
|
||
|
" 71%|#######1 | 106/149 [03:49<01:24, 1.96s/it]\n",
|
||
|
" 72%|#######1 | 107/149 [03:51<01:27, 2.08s/it]\n",
|
||
|
" 72%|#######2 | 108/149 [03:53<01:22, 2.02s/it]\n",
|
||
|
" 73%|#######3 | 109/149 [03:55<01:16, 1.92s/it]\n",
|
||
|
" 74%|#######3 | 110/149 [03:57<01:20, 2.06s/it]\n",
|
||
|
" 74%|#######4 | 111/149 [03:59<01:16, 2.01s/it]\n",
|
||
|
" 75%|#######5 | 112/149 [04:02<01:18, 2.11s/it]\n",
|
||
|
" 76%|#######5 | 113/149 [04:04<01:15, 2.10s/it]\n",
|
||
|
" 77%|#######6 | 114/149 [04:06<01:18, 2.25s/it]\n",
|
||
|
" 77%|#######7 | 115/149 [04:08<01:13, 2.15s/it]\n",
|
||
|
" 78%|#######7 | 116/149 [04:11<01:16, 2.33s/it]\n",
|
||
|
" 79%|#######8 | 117/149 [04:14<01:22, 2.59s/it]\n",
|
||
|
" 79%|#######9 | 118/149 [04:16<01:15, 2.44s/it]\n",
|
||
|
" 80%|#######9 | 119/149 [04:18<01:03, 2.11s/it]\n",
|
||
|
" 81%|######## | 120/149 [04:19<00:57, 1.97s/it]\n",
|
||
|
" 81%|########1 | 121/149 [04:21<00:51, 1.83s/it]\n",
|
||
|
" 82%|########1 | 122/149 [04:22<00:45, 1.68s/it]\n",
|
||
|
" 83%|########2 | 123/149 [04:24<00:48, 1.86s/it]\n",
|
||
|
" 83%|########3 | 124/149 [04:27<00:48, 1.94s/it]\n",
|
||
|
" 84%|########3 | 125/149 [04:28<00:42, 1.79s/it]\n",
|
||
|
" 85%|########4 | 126/149 [04:30<00:42, 1.84s/it]\n",
|
||
|
" 85%|########5 | 127/149 [04:31<00:38, 1.74s/it]\n",
|
||
|
" 86%|########5 | 128/149 [04:33<00:33, 1.58s/it]\n",
|
||
|
" 87%|########6 | 129/149 [04:35<00:35, 1.78s/it]\n",
|
||
|
" 87%|########7 | 130/149 [04:38<00:42, 2.23s/it]\n",
|
||
|
" 88%|########7 | 131/149 [04:39<00:34, 1.93s/it]\n",
|
||
|
" 89%|########8 | 132/149 [04:41<00:30, 1.77s/it]\n",
|
||
|
" 89%|########9 | 133/149 [04:43<00:29, 1.83s/it]\n",
|
||
|
" 90%|########9 | 134/149 [04:44<00:24, 1.65s/it]\n",
|
||
|
" 91%|######### | 135/149 [04:46<00:24, 1.72s/it]\n",
|
||
|
" 91%|#########1| 136/149 [04:47<00:21, 1.69s/it]\n",
|
||
|
" 92%|#########1| 137/149 [04:50<00:22, 1.86s/it]\n",
|
||
|
" 93%|#########2| 138/149 [04:51<00:19, 1.74s/it]\n",
|
||
|
" 93%|#########3| 139/149 [04:55<00:23, 2.37s/it]\n",
|
||
|
" 94%|#########3| 140/149 [04:57<00:20, 2.29s/it]\n",
|
||
|
" 95%|#########4| 141/149 [04:59<00:16, 2.06s/it]\n",
|
||
|
" 95%|#########5| 142/149 [05:00<00:13, 1.97s/it]\n",
|
||
|
" 96%|#########5| 143/149 [05:02<00:10, 1.83s/it]\n",
|
||
|
" 97%|#########6| 144/149 [05:04<00:09, 1.96s/it]\n",
|
||
|
" 97%|#########7| 145/149 [05:06<00:08, 2.01s/it]\n",
|
||
|
" 98%|#########7| 146/149 [05:08<00:05, 1.87s/it]\n",
|
||
|
" 99%|#########8| 147/149 [05:11<00:04, 2.17s/it]\n",
|
||
|
" 99%|#########9| 148/149 [05:13<00:02, 2.22s/it]\n",
|
||
|
"100%|##########| 149/149 [05:13<00:00, 1.62s/it]02/17/2022 17:43:39 - INFO - __main__ - Epoch 0: {'accuracy': 0.888}\n",
|
||
|
"02/17/2022 17:44:11 - INFO - __main__ - Test-set evaluation: {'accuracy': 0.914}\n",
|
||
|
"Configuration saved in out/tweet/gpt2_version_5\\config.json\n",
|
||
|
"Model weights saved in out/tweet/gpt2_version_5\\pytorch_model.bin\n",
|
||
|
"tokenizer config file saved in out/tweet/gpt2_version_5\\tokenizer_config.json\n",
|
||
|
"Special tokens file saved in out/tweet/gpt2_version_5\\special_tokens_map.json\n",
|
||
|
"\n",
|
||
|
"100%|##########| 149/149 [06:23<00:00, 2.57s/it]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"!python run_glue_no_trainer.py \\\n",
|
||
|
" --model_name_or_path gpt2 \\\n",
|
||
|
" --train_file data/train.json \\\n",
|
||
|
" --validation_file data/valid.json \\\n",
|
||
|
" --test_file data/test.json \\\n",
|
||
|
" --per_device_train_batch_size 32 \\\n",
|
||
|
" --per_device_eval_batch_size 32 \\\n",
|
||
|
" --max_length 128 \\\n",
|
||
|
" --freeze_model \\\n",
|
||
|
" --custom_model \\\n",
|
||
|
" --return_hidden_states \\\n",
|
||
|
" --learning_rate 2e-5 \\\n",
|
||
|
" --num_train_epochs 1 \\\n",
|
||
|
" --output_dir out/tweet/gpt2_version_5"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Roberta"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"02/16/2022 00:45:12 - INFO - __main__ - Distributed environment: NO\n",
|
||
|
"Num processes: 1\n",
|
||
|
"Process index: 0\n",
|
||
|
"Local process index: 0\n",
|
||
|
"Device: cpu\n",
|
||
|
"Use FP16 precision: False\n",
|
||
|
"\n",
|
||
|
"02/16/2022 00:45:12 - WARNING - datasets.builder - Using custom data configuration default-67c9d932a627b7b8\n",
|
||
|
"02/16/2022 00:45:12 - WARNING - datasets.builder - Reusing dataset json (C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426)\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/3 [00:00<?, ?it/s]\n",
|
||
|
"100%|##########| 3/3 [00:00<00:00, 1503.87it/s]\n",
|
||
|
"loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b\n",
|
||
|
"Model config RobertaConfig {\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"RobertaForMaskedLM\"\n",
|
||
|
" ],\n",
|
||
|
" \"attention_probs_dropout_prob\": 0.1,\n",
|
||
|
" \"bos_token_id\": 0,\n",
|
||
|
" \"classifier_dropout\": null,\n",
|
||
|
" \"eos_token_id\": 2,\n",
|
||
|
" \"hidden_act\": \"gelu\",\n",
|
||
|
" \"hidden_dropout_prob\": 0.1,\n",
|
||
|
" \"hidden_size\": 768,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"intermediate_size\": 3072,\n",
|
||
|
" \"layer_norm_eps\": 1e-05,\n",
|
||
|
" \"max_position_embeddings\": 514,\n",
|
||
|
" \"model_type\": \"roberta\",\n",
|
||
|
" \"num_attention_heads\": 12,\n",
|
||
|
" \"num_hidden_layers\": 12,\n",
|
||
|
" \"pad_token_id\": 1,\n",
|
||
|
" \"position_embedding_type\": \"absolute\",\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"type_vocab_size\": 1,\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50265\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
|
||
|
"loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b\n",
|
||
|
"Model config RobertaConfig {\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"RobertaForMaskedLM\"\n",
|
||
|
" ],\n",
|
||
|
" \"attention_probs_dropout_prob\": 0.1,\n",
|
||
|
" \"bos_token_id\": 0,\n",
|
||
|
" \"classifier_dropout\": null,\n",
|
||
|
" \"eos_token_id\": 2,\n",
|
||
|
" \"hidden_act\": \"gelu\",\n",
|
||
|
" \"hidden_dropout_prob\": 0.1,\n",
|
||
|
" \"hidden_size\": 768,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"intermediate_size\": 3072,\n",
|
||
|
" \"layer_norm_eps\": 1e-05,\n",
|
||
|
" \"max_position_embeddings\": 514,\n",
|
||
|
" \"model_type\": \"roberta\",\n",
|
||
|
" \"num_attention_heads\": 12,\n",
|
||
|
" \"num_hidden_layers\": 12,\n",
|
||
|
" \"pad_token_id\": 1,\n",
|
||
|
" \"position_embedding_type\": \"absolute\",\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"type_vocab_size\": 1,\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50265\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/vocab.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\d3ccdbfeb9aaa747ef20432d4976c32ee3fa69663b379deb253ccfce2bb1fdc5.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/merges.txt from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\cafdecc90fcab17011e12ac813dd574b4b3fea39da6dd817813efa010262ff3f.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/tokenizer.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\d53fc0fa09b8342651efd4073d75e19617b3e51287c2a535becda5808a8db287.fc9576039592f026ad76a1c231b89aee8668488c671dfbe6616bab2ed298d730\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/added_tokens.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/special_tokens_map.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/tokenizer_config.json from cache at None\n",
|
||
|
"loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b\n",
|
||
|
"Model config RobertaConfig {\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"RobertaForMaskedLM\"\n",
|
||
|
" ],\n",
|
||
|
" \"attention_probs_dropout_prob\": 0.1,\n",
|
||
|
" \"bos_token_id\": 0,\n",
|
||
|
" \"classifier_dropout\": null,\n",
|
||
|
" \"eos_token_id\": 2,\n",
|
||
|
" \"hidden_act\": \"gelu\",\n",
|
||
|
" \"hidden_dropout_prob\": 0.1,\n",
|
||
|
" \"hidden_size\": 768,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"intermediate_size\": 3072,\n",
|
||
|
" \"layer_norm_eps\": 1e-05,\n",
|
||
|
" \"max_position_embeddings\": 514,\n",
|
||
|
" \"model_type\": \"roberta\",\n",
|
||
|
" \"num_attention_heads\": 12,\n",
|
||
|
" \"num_hidden_layers\": 12,\n",
|
||
|
" \"pad_token_id\": 1,\n",
|
||
|
" \"position_embedding_type\": \"absolute\",\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"type_vocab_size\": 1,\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50265\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"02/16/2022 00:45:18 - INFO - __main__ - Return hidden states from model: False\n",
|
||
|
"02/16/2022 00:45:18 - INFO - __main__ - Using implementation from: AutoModelForSequenceClassification\n",
|
||
|
"loading weights file https://huggingface.co/roberta-base/resolve/main/pytorch_model.bin from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\51ba668f7ff34e7cdfa9561e8361747738113878850a7d717dbc69de8683aaad.c7efaa30a0d80b2958b876969faa180e485944a849deee4ad482332de65365a7\n",
|
||
|
"Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.weight', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']\n",
|
||
|
"- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
|
||
|
"- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
|
||
|
"Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.bias']\n",
|
||
|
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
||
|
"02/16/2022 00:45:20 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-9bed43ed70dc0bb2.arrow\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/1 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 1/1 [00:00<00:00, 35.81ba/s]\n",
|
||
|
"02/16/2022 00:45:20 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-a7293927c8abf169.arrow\n",
|
||
|
"02/16/2022 00:45:20 - INFO - __main__ - Sample 528 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [0, 9226, 269, 16, 103, 9869, 138, 47, 33, 89, 6, 4716, 1827, 8, 787, 12105, 157, 626, 4, 1437, 1437, 849, 22122, 991, 30619, 849, 21363, 46730, 219, 2], 'labels': 1}.\n",
|
||
|
"02/16/2022 00:45:20 - INFO - __main__ - Sample 3981 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [0, 39398, 4056, 4333, 4056, 10674, 4056, 46, 849, 17693, 849, 16063, 1073, 5715, 849, 17827, 20168, 4183, 857, 299, 4, 35103, 849, 90, 25933, 849, 438, 4467, 849, 1794, 849, 28878, 16170, 849, 28481, 1794, 1437, 1437, 849, 28481, 3695, 4056, 7471, 4056, 18164, 1437, 2], 'labels': 0}.\n",
|
||
|
"02/16/2022 00:45:20 - INFO - __main__ - Sample 4184 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [0, 2716, 18, 4076, 103, 849, 267, 6988, 428, 33726, 849, 1452, 10071, 849, 1452, 10071, 9029, 849, 4082, 5536, 11819, 849, 10393, 19347, 849, 37096, 1437, 1437, 849, 31518, 849, 1193, 366, 3695, 4056, 7471, 4056, 18164, 1437, 2], 'labels': 0}.\n",
|
||
|
"02/16/2022 00:45:21 - INFO - __main__ - ***** Running training *****\n",
|
||
|
"02/16/2022 00:45:21 - INFO - __main__ - Num examples = 4742\n",
|
||
|
"02/16/2022 00:45:21 - INFO - __main__ - Num Epochs = 1\n",
|
||
|
"02/16/2022 00:45:21 - INFO - __main__ - Instantaneous batch size per device = 24\n",
|
||
|
"02/16/2022 00:45:21 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 24\n",
|
||
|
"02/16/2022 00:45:21 - INFO - __main__ - Gradient Accumulation steps = 1\n",
|
||
|
"02/16/2022 00:45:21 - INFO - __main__ - Total optimization steps = 198\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/198 [00:00<?, ?it/s]\n",
|
||
|
" 1%| | 1/198 [00:03<12:09, 3.70s/it]\n",
|
||
|
" 1%|1 | 2/198 [00:06<09:32, 2.92s/it]\n",
|
||
|
" 2%|1 | 3/198 [00:08<08:22, 2.57s/it]\n",
|
||
|
" 2%|2 | 4/198 [00:14<12:25, 3.84s/it]\n",
|
||
|
" 3%|2 | 5/198 [00:17<12:05, 3.76s/it]\n",
|
||
|
" 3%|3 | 6/198 [00:20<10:48, 3.38s/it]\n",
|
||
|
" 4%|3 | 7/198 [00:23<11:00, 3.46s/it]\n",
|
||
|
" 4%|4 | 8/198 [00:27<11:16, 3.56s/it]\n",
|
||
|
" 5%|4 | 9/198 [00:30<10:53, 3.46s/it]\n",
|
||
|
" 5%|5 | 10/198 [00:33<10:11, 3.25s/it]\n",
|
||
|
" 6%|5 | 11/198 [00:36<09:26, 3.03s/it]\n",
|
||
|
" 6%|6 | 12/198 [00:38<09:01, 2.91s/it]\n",
|
||
|
" 7%|6 | 13/198 [00:42<09:18, 3.02s/it]\n",
|
||
|
" 7%|7 | 14/198 [00:45<09:07, 2.98s/it]\n",
|
||
|
" 8%|7 | 15/198 [00:47<08:47, 2.88s/it]\n",
|
||
|
" 8%|8 | 16/198 [00:49<08:05, 2.67s/it]\n",
|
||
|
" 9%|8 | 17/198 [00:52<07:52, 2.61s/it]\n",
|
||
|
" 9%|9 | 18/198 [00:55<08:38, 2.88s/it]\n",
|
||
|
" 10%|9 | 19/198 [00:58<08:41, 2.91s/it]\n",
|
||
|
" 10%|# | 20/198 [01:01<08:23, 2.83s/it]\n",
|
||
|
" 11%|# | 21/198 [01:04<08:14, 2.79s/it]\n",
|
||
|
" 11%|#1 | 22/198 [01:07<08:45, 2.99s/it]\n",
|
||
|
" 12%|#1 | 23/198 [01:11<09:15, 3.17s/it]\n",
|
||
|
" 12%|#2 | 24/198 [01:17<11:41, 4.03s/it]\n",
|
||
|
" 13%|#2 | 25/198 [01:21<11:51, 4.11s/it]\n",
|
||
|
" 13%|#3 | 26/198 [01:25<11:24, 3.98s/it]\n",
|
||
|
" 14%|#3 | 27/198 [01:27<09:59, 3.50s/it]\n",
|
||
|
" 14%|#4 | 28/198 [01:30<09:05, 3.21s/it]\n",
|
||
|
" 15%|#4 | 29/198 [01:32<08:37, 3.06s/it]\n",
|
||
|
" 15%|#5 | 30/198 [01:35<08:19, 2.97s/it]\n",
|
||
|
" 16%|#5 | 31/198 [01:39<09:09, 3.29s/it]\n",
|
||
|
" 16%|#6 | 32/198 [01:42<08:22, 3.03s/it]\n",
|
||
|
" 17%|#6 | 33/198 [01:44<07:45, 2.82s/it]\n",
|
||
|
" 17%|#7 | 34/198 [01:47<07:36, 2.78s/it]\n",
|
||
|
" 18%|#7 | 35/198 [01:50<07:55, 2.92s/it]\n",
|
||
|
" 18%|#8 | 36/198 [01:53<07:49, 2.90s/it]\n",
|
||
|
" 19%|#8 | 37/198 [01:59<10:23, 3.87s/it]\n",
|
||
|
" 19%|#9 | 38/198 [02:03<10:44, 4.03s/it]\n",
|
||
|
" 20%|#9 | 39/198 [02:06<09:27, 3.57s/it]\n",
|
||
|
" 20%|## | 40/198 [02:09<09:31, 3.62s/it]\n",
|
||
|
" 21%|## | 41/198 [02:13<09:30, 3.63s/it]\n",
|
||
|
" 21%|##1 | 42/198 [02:16<08:53, 3.42s/it]\n",
|
||
|
" 22%|##1 | 43/198 [02:22<10:45, 4.17s/it]\n",
|
||
|
" 22%|##2 | 44/198 [02:26<10:21, 4.04s/it]\n",
|
||
|
" 23%|##2 | 45/198 [02:28<09:18, 3.65s/it]\n",
|
||
|
" 23%|##3 | 46/198 [02:31<08:46, 3.46s/it]\n",
|
||
|
" 24%|##3 | 47/198 [02:34<07:59, 3.18s/it]\n",
|
||
|
" 24%|##4 | 48/198 [02:36<07:27, 2.98s/it]\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
" 25%|##4 | 49/198 [02:40<07:56, 3.20s/it]\n",
|
||
|
" 25%|##5 | 50/198 [02:43<07:30, 3.04s/it]\n",
|
||
|
" 26%|##5 | 51/198 [02:47<07:55, 3.24s/it]\n",
|
||
|
" 26%|##6 | 52/198 [02:50<08:15, 3.40s/it]\n",
|
||
|
" 27%|##6 | 53/198 [02:53<07:46, 3.22s/it]\n",
|
||
|
" 27%|##7 | 54/198 [02:55<06:56, 2.90s/it]\n",
|
||
|
" 28%|##7 | 55/198 [03:01<09:10, 3.85s/it]\n",
|
||
|
" 28%|##8 | 56/198 [03:04<08:23, 3.55s/it]\n",
|
||
|
" 29%|##8 | 57/198 [03:08<08:11, 3.49s/it]\n",
|
||
|
" 29%|##9 | 58/198 [03:11<07:48, 3.35s/it]\n",
|
||
|
" 30%|##9 | 59/198 [03:15<08:45, 3.78s/it]\n",
|
||
|
" 30%|### | 60/198 [03:18<07:58, 3.47s/it]\n",
|
||
|
" 31%|### | 61/198 [03:24<09:37, 4.22s/it]\n",
|
||
|
" 31%|###1 | 62/198 [03:27<08:49, 3.89s/it]\n",
|
||
|
" 32%|###1 | 63/198 [03:30<07:56, 3.53s/it]\n",
|
||
|
" 32%|###2 | 64/198 [03:33<07:37, 3.41s/it]\n",
|
||
|
" 33%|###2 | 65/198 [03:37<07:43, 3.49s/it]\n",
|
||
|
" 33%|###3 | 66/198 [03:39<06:54, 3.14s/it]\n",
|
||
|
" 34%|###3 | 67/198 [03:42<06:38, 3.04s/it]\n",
|
||
|
" 34%|###4 | 68/198 [03:46<07:15, 3.35s/it]\n",
|
||
|
" 35%|###4 | 69/198 [03:50<07:50, 3.65s/it]\n",
|
||
|
" 35%|###5 | 70/198 [03:54<07:33, 3.55s/it]\n",
|
||
|
" 36%|###5 | 71/198 [03:57<07:24, 3.50s/it]\n",
|
||
|
" 36%|###6 | 72/198 [03:59<06:45, 3.22s/it]\n",
|
||
|
" 37%|###6 | 73/198 [04:04<07:14, 3.47s/it]\n",
|
||
|
" 37%|###7 | 74/198 [04:06<06:32, 3.16s/it]\n",
|
||
|
" 38%|###7 | 75/198 [04:08<06:00, 2.93s/it]\n",
|
||
|
" 38%|###8 | 76/198 [04:11<05:40, 2.79s/it]\n",
|
||
|
" 39%|###8 | 77/198 [04:15<06:27, 3.20s/it]\n",
|
||
|
" 39%|###9 | 78/198 [04:18<06:19, 3.16s/it]\n",
|
||
|
" 40%|###9 | 79/198 [04:21<05:55, 2.99s/it]\n",
|
||
|
" 40%|#### | 80/198 [04:24<05:56, 3.02s/it]\n",
|
||
|
" 41%|#### | 81/198 [04:28<06:44, 3.46s/it]\n",
|
||
|
" 41%|####1 | 82/198 [04:31<06:15, 3.24s/it]\n",
|
||
|
" 42%|####1 | 83/198 [04:34<05:59, 3.12s/it]\n",
|
||
|
" 42%|####2 | 84/198 [04:36<05:35, 2.94s/it]\n",
|
||
|
" 43%|####2 | 85/198 [04:42<07:11, 3.82s/it]\n",
|
||
|
" 43%|####3 | 86/198 [04:44<06:13, 3.34s/it]\n",
|
||
|
" 44%|####3 | 87/198 [04:47<05:43, 3.09s/it]\n",
|
||
|
" 44%|####4 | 88/198 [04:49<05:12, 2.84s/it]\n",
|
||
|
" 45%|####4 | 89/198 [04:52<05:11, 2.86s/it]\n",
|
||
|
" 45%|####5 | 90/198 [04:56<05:52, 3.27s/it]\n",
|
||
|
" 46%|####5 | 91/198 [04:59<05:29, 3.08s/it]\n",
|
||
|
" 46%|####6 | 92/198 [05:02<05:28, 3.10s/it]\n",
|
||
|
" 47%|####6 | 93/198 [05:04<05:01, 2.87s/it]\n",
|
||
|
" 47%|####7 | 94/198 [05:07<04:58, 2.87s/it]\n",
|
||
|
" 48%|####7 | 95/198 [05:11<05:34, 3.25s/it]\n",
|
||
|
" 48%|####8 | 96/198 [05:15<05:44, 3.38s/it]\n",
|
||
|
" 49%|####8 | 97/198 [05:19<05:49, 3.46s/it]\n",
|
||
|
" 49%|####9 | 98/198 [05:22<05:51, 3.51s/it]\n",
|
||
|
" 50%|##### | 99/198 [05:25<05:16, 3.20s/it]\n",
|
||
|
" 51%|##### | 100/198 [05:29<05:27, 3.34s/it]\n",
|
||
|
" 51%|#####1 | 101/198 [05:32<05:35, 3.46s/it]\n",
|
||
|
" 52%|#####1 | 102/198 [05:35<05:14, 3.28s/it]\n",
|
||
|
" 52%|#####2 | 103/198 [05:37<04:40, 2.95s/it]\n",
|
||
|
" 53%|#####2 | 104/198 [05:40<04:40, 2.98s/it]\n",
|
||
|
" 53%|#####3 | 105/198 [05:42<04:13, 2.73s/it]\n",
|
||
|
" 54%|#####3 | 106/198 [05:45<04:12, 2.74s/it]\n",
|
||
|
" 54%|#####4 | 107/198 [05:48<04:05, 2.70s/it]\n",
|
||
|
" 55%|#####4 | 108/198 [05:51<04:05, 2.73s/it]\n",
|
||
|
" 55%|#####5 | 109/198 [05:54<04:08, 2.80s/it]\n",
|
||
|
" 56%|#####5 | 110/198 [05:57<04:20, 2.96s/it]\n",
|
||
|
" 56%|#####6 | 111/198 [06:00<04:24, 3.04s/it]\n",
|
||
|
" 57%|#####6 | 112/198 [06:04<04:37, 3.23s/it]\n",
|
||
|
" 57%|#####7 | 113/198 [06:07<04:25, 3.12s/it]\n",
|
||
|
" 58%|#####7 | 114/198 [06:09<04:12, 3.01s/it]\n",
|
||
|
" 58%|#####8 | 115/198 [06:13<04:23, 3.18s/it]\n",
|
||
|
" 59%|#####8 | 116/198 [06:16<04:19, 3.16s/it]\n",
|
||
|
" 59%|#####9 | 117/198 [06:19<04:11, 3.11s/it]\n",
|
||
|
" 60%|#####9 | 118/198 [06:23<04:26, 3.33s/it]\n",
|
||
|
" 60%|###### | 119/198 [06:25<03:57, 3.00s/it]\n",
|
||
|
" 61%|###### | 120/198 [06:29<04:09, 3.20s/it]\n",
|
||
|
" 61%|######1 | 121/198 [06:33<04:17, 3.34s/it]\n",
|
||
|
" 62%|######1 | 122/198 [06:39<05:14, 4.14s/it]\n",
|
||
|
" 62%|######2 | 123/198 [06:43<05:09, 4.12s/it]\n",
|
||
|
" 63%|######2 | 124/198 [06:45<04:35, 3.73s/it]\n",
|
||
|
" 63%|######3 | 125/198 [06:48<04:11, 3.44s/it]\n",
|
||
|
" 64%|######3 | 126/198 [06:51<03:46, 3.14s/it]\n",
|
||
|
" 64%|######4 | 127/198 [06:54<03:53, 3.29s/it]\n",
|
||
|
" 65%|######4 | 128/198 [06:57<03:39, 3.14s/it]\n",
|
||
|
" 65%|######5 | 129/198 [07:00<03:33, 3.09s/it]\n",
|
||
|
" 66%|######5 | 130/198 [07:03<03:27, 3.05s/it]\n",
|
||
|
" 66%|######6 | 131/198 [07:06<03:14, 2.91s/it]\n",
|
||
|
" 67%|######6 | 132/198 [07:08<03:02, 2.77s/it]\n",
|
||
|
" 67%|######7 | 133/198 [07:10<02:52, 2.65s/it]\n",
|
||
|
" 68%|######7 | 134/198 [07:13<02:43, 2.56s/it]\n",
|
||
|
" 68%|######8 | 135/198 [07:16<02:55, 2.78s/it]\n",
|
||
|
" 69%|######8 | 136/198 [07:19<02:48, 2.71s/it]\n",
|
||
|
" 69%|######9 | 137/198 [07:22<03:04, 3.02s/it]\n",
|
||
|
" 70%|######9 | 138/198 [07:28<03:43, 3.72s/it]\n",
|
||
|
" 70%|####### | 139/198 [07:31<03:29, 3.56s/it]\n",
|
||
|
" 71%|####### | 140/198 [07:34<03:26, 3.56s/it]\n",
|
||
|
" 71%|#######1 | 141/198 [07:37<03:12, 3.37s/it]\n",
|
||
|
" 72%|#######1 | 142/198 [07:41<03:12, 3.44s/it]\n",
|
||
|
" 72%|#######2 | 143/198 [07:45<03:12, 3.50s/it]\n",
|
||
|
" 73%|#######2 | 144/198 [07:47<02:56, 3.27s/it]\n",
|
||
|
" 73%|#######3 | 145/198 [07:50<02:46, 3.14s/it]\n",
|
||
|
" 74%|#######3 | 146/198 [07:53<02:36, 3.00s/it]\n",
|
||
|
" 74%|#######4 | 147/198 [07:55<02:25, 2.85s/it]\n",
|
||
|
" 75%|#######4 | 148/198 [07:59<02:34, 3.08s/it]\n",
|
||
|
" 75%|#######5 | 149/198 [08:03<02:40, 3.27s/it]\n",
|
||
|
" 76%|#######5 | 150/198 [08:05<02:23, 2.99s/it]\n",
|
||
|
" 76%|#######6 | 151/198 [08:09<02:30, 3.21s/it]\n",
|
||
|
" 77%|#######6 | 152/198 [08:12<02:33, 3.34s/it]\n",
|
||
|
" 77%|#######7 | 153/198 [08:15<02:21, 3.14s/it]\n",
|
||
|
" 78%|#######7 | 154/198 [08:19<02:25, 3.31s/it]\n",
|
||
|
" 78%|#######8 | 155/198 [08:21<02:12, 3.08s/it]\n",
|
||
|
" 79%|#######8 | 156/198 [08:24<01:58, 2.81s/it]\n",
|
||
|
" 79%|#######9 | 157/198 [08:27<02:07, 3.10s/it]\n",
|
||
|
" 80%|#######9 | 158/198 [08:30<02:00, 3.01s/it]\n",
|
||
|
" 80%|######## | 159/198 [08:35<02:17, 3.52s/it]\n",
|
||
|
" 81%|######## | 160/198 [08:37<02:01, 3.20s/it]\n",
|
||
|
" 81%|########1 | 161/198 [08:40<01:51, 3.02s/it]\n",
|
||
|
" 82%|########1 | 162/198 [08:45<02:10, 3.62s/it]\n",
|
||
|
" 82%|########2 | 163/198 [08:48<02:02, 3.51s/it]\n",
|
||
|
" 83%|########2 | 164/198 [08:52<02:02, 3.59s/it]\n",
|
||
|
" 83%|########3 | 165/198 [08:58<02:22, 4.31s/it]\n",
|
||
|
" 84%|########3 | 166/198 [09:00<02:00, 3.75s/it]\n",
|
||
|
" 84%|########4 | 167/198 [09:03<01:42, 3.30s/it]\n",
|
||
|
" 85%|########4 | 168/198 [09:06<01:41, 3.39s/it]\n",
|
||
|
" 85%|########5 | 169/198 [09:09<01:33, 3.21s/it]\n",
|
||
|
" 86%|########5 | 170/198 [09:12<01:25, 3.06s/it]\n",
|
||
|
" 86%|########6 | 171/198 [09:17<01:38, 3.67s/it]\n",
|
||
|
" 87%|########6 | 172/198 [09:19<01:27, 3.38s/it]\n",
|
||
|
" 87%|########7 | 173/198 [09:25<01:41, 4.07s/it]\n",
|
||
|
" 88%|########7 | 174/198 [09:28<01:28, 3.68s/it]\n",
|
||
|
" 88%|########8 | 175/198 [09:31<01:22, 3.60s/it]\n",
|
||
|
" 89%|########8 | 176/198 [09:35<01:21, 3.69s/it]\n",
|
||
|
" 89%|########9 | 177/198 [09:38<01:09, 3.31s/it]\n",
|
||
|
" 90%|########9 | 178/198 [09:40<00:59, 2.97s/it]\n",
|
||
|
" 90%|######### | 179/198 [09:43<01:00, 3.17s/it]\n",
|
||
|
" 91%|######### | 180/198 [09:47<00:58, 3.25s/it]\n",
|
||
|
" 91%|#########1| 181/198 [09:50<00:52, 3.07s/it]\n",
|
||
|
" 92%|#########1| 182/198 [09:53<00:49, 3.10s/it]\n",
|
||
|
" 92%|#########2| 183/198 [09:55<00:41, 2.79s/it]\n",
|
||
|
" 93%|#########2| 184/198 [09:58<00:42, 3.03s/it]\n",
|
||
|
" 93%|#########3| 185/198 [10:01<00:36, 2.84s/it]\n",
|
||
|
" 94%|#########3| 186/198 [10:03<00:31, 2.66s/it]\n",
|
||
|
" 94%|#########4| 187/198 [10:05<00:27, 2.53s/it]\n",
|
||
|
" 95%|#########4| 188/198 [10:08<00:25, 2.57s/it]\n",
|
||
|
" 95%|#########5| 189/198 [10:13<00:29, 3.25s/it]\n",
|
||
|
" 96%|#########5| 190/198 [10:16<00:26, 3.36s/it]\n",
|
||
|
" 96%|#########6| 191/198 [10:19<00:21, 3.03s/it]\n",
|
||
|
" 97%|#########6| 192/198 [10:22<00:18, 3.13s/it]\n",
|
||
|
" 97%|#########7| 193/198 [10:24<00:14, 2.91s/it]\n",
|
||
|
" 98%|#########7| 194/198 [10:28<00:12, 3.13s/it]\n",
|
||
|
" 98%|#########8| 195/198 [10:31<00:09, 3.06s/it]\n",
|
||
|
" 99%|#########8| 196/198 [10:36<00:07, 3.64s/it]\n",
|
||
|
" 99%|#########9| 197/198 [10:39<00:03, 3.58s/it]\n",
|
||
|
"100%|##########| 198/198 [10:41<00:00, 2.92s/it]02/16/2022 00:56:30 - INFO - __main__ - Epoch 0: {'accuracy': 0.948}\n",
|
||
|
"02/16/2022 00:56:53 - INFO - __main__ - Test-set evaluation: {'accuracy': 0.942}\n",
|
||
|
"Configuration saved in out/tweet/roberta\\config.json\n",
|
||
|
"Model weights saved in out/tweet/roberta\\pytorch_model.bin\n",
|
||
|
"tokenizer config file saved in out/tweet/roberta\\tokenizer_config.json\n",
|
||
|
"Special tokens file saved in out/tweet/roberta\\special_tokens_map.json\n",
|
||
|
"\n",
|
||
|
"100%|##########| 198/198 [11:32<00:00, 3.50s/it]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"!python run_glue_no_trainer.py \\\n",
|
||
|
" --model_name_or_path roberta-base \\\n",
|
||
|
" --train_file data/train.json \\\n",
|
||
|
" --validation_file data/valid.json \\\n",
|
||
|
" --test_file data/test.json \\\n",
|
||
|
" --per_device_train_batch_size 24 \\\n",
|
||
|
" --per_device_eval_batch_size 24 \\\n",
|
||
|
" --max_length 128 \\\n",
|
||
|
" --learning_rate 2e-5 \\\n",
|
||
|
" --num_train_epochs 1 \\\n",
|
||
|
" --output_dir out/tweet/roberta"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Roberta version 2"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 6,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"02/16/2022 00:56:55 - INFO - __main__ - Distributed environment: NO\n",
|
||
|
"Num processes: 1\n",
|
||
|
"Process index: 0\n",
|
||
|
"Local process index: 0\n",
|
||
|
"Device: cpu\n",
|
||
|
"Use FP16 precision: False\n",
|
||
|
"\n",
|
||
|
"02/16/2022 00:56:56 - WARNING - datasets.builder - Using custom data configuration default-67c9d932a627b7b8\n",
|
||
|
"02/16/2022 00:56:56 - WARNING - datasets.builder - Reusing dataset json (C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426)\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/3 [00:00<?, ?it/s]\n",
|
||
|
"100%|##########| 3/3 [00:00<00:00, 1504.59it/s]\n",
|
||
|
"loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b\n",
|
||
|
"Model config RobertaConfig {\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"RobertaForMaskedLM\"\n",
|
||
|
" ],\n",
|
||
|
" \"attention_probs_dropout_prob\": 0.1,\n",
|
||
|
" \"bos_token_id\": 0,\n",
|
||
|
" \"classifier_dropout\": null,\n",
|
||
|
" \"eos_token_id\": 2,\n",
|
||
|
" \"hidden_act\": \"gelu\",\n",
|
||
|
" \"hidden_dropout_prob\": 0.1,\n",
|
||
|
" \"hidden_size\": 768,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"intermediate_size\": 3072,\n",
|
||
|
" \"layer_norm_eps\": 1e-05,\n",
|
||
|
" \"max_position_embeddings\": 514,\n",
|
||
|
" \"model_type\": \"roberta\",\n",
|
||
|
" \"num_attention_heads\": 12,\n",
|
||
|
" \"num_hidden_layers\": 12,\n",
|
||
|
" \"pad_token_id\": 1,\n",
|
||
|
" \"position_embedding_type\": \"absolute\",\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"type_vocab_size\": 1,\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50265\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
|
||
|
"loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b\n",
|
||
|
"Model config RobertaConfig {\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"RobertaForMaskedLM\"\n",
|
||
|
" ],\n",
|
||
|
" \"attention_probs_dropout_prob\": 0.1,\n",
|
||
|
" \"bos_token_id\": 0,\n",
|
||
|
" \"classifier_dropout\": null,\n",
|
||
|
" \"eos_token_id\": 2,\n",
|
||
|
" \"hidden_act\": \"gelu\",\n",
|
||
|
" \"hidden_dropout_prob\": 0.1,\n",
|
||
|
" \"hidden_size\": 768,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"intermediate_size\": 3072,\n",
|
||
|
" \"layer_norm_eps\": 1e-05,\n",
|
||
|
" \"max_position_embeddings\": 514,\n",
|
||
|
" \"model_type\": \"roberta\",\n",
|
||
|
" \"num_attention_heads\": 12,\n",
|
||
|
" \"num_hidden_layers\": 12,\n",
|
||
|
" \"pad_token_id\": 1,\n",
|
||
|
" \"position_embedding_type\": \"absolute\",\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"type_vocab_size\": 1,\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50265\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/vocab.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\d3ccdbfeb9aaa747ef20432d4976c32ee3fa69663b379deb253ccfce2bb1fdc5.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/merges.txt from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\cafdecc90fcab17011e12ac813dd574b4b3fea39da6dd817813efa010262ff3f.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/tokenizer.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\d53fc0fa09b8342651efd4073d75e19617b3e51287c2a535becda5808a8db287.fc9576039592f026ad76a1c231b89aee8668488c671dfbe6616bab2ed298d730\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/added_tokens.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/special_tokens_map.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/tokenizer_config.json from cache at None\n",
|
||
|
"loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b\n",
|
||
|
"Model config RobertaConfig {\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"RobertaForMaskedLM\"\n",
|
||
|
" ],\n",
|
||
|
" \"attention_probs_dropout_prob\": 0.1,\n",
|
||
|
" \"bos_token_id\": 0,\n",
|
||
|
" \"classifier_dropout\": null,\n",
|
||
|
" \"eos_token_id\": 2,\n",
|
||
|
" \"hidden_act\": \"gelu\",\n",
|
||
|
" \"hidden_dropout_prob\": 0.1,\n",
|
||
|
" \"hidden_size\": 768,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"intermediate_size\": 3072,\n",
|
||
|
" \"layer_norm_eps\": 1e-05,\n",
|
||
|
" \"max_position_embeddings\": 514,\n",
|
||
|
" \"model_type\": \"roberta\",\n",
|
||
|
" \"num_attention_heads\": 12,\n",
|
||
|
" \"num_hidden_layers\": 12,\n",
|
||
|
" \"pad_token_id\": 1,\n",
|
||
|
" \"position_embedding_type\": \"absolute\",\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"type_vocab_size\": 1,\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50265\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"02/16/2022 00:57:02 - INFO - __main__ - Return hidden states from model: False\n",
|
||
|
"02/16/2022 00:57:02 - INFO - __main__ - Using implementation from: AutoModelForSequenceClassification\n",
|
||
|
"loading weights file https://huggingface.co/roberta-base/resolve/main/pytorch_model.bin from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\51ba668f7ff34e7cdfa9561e8361747738113878850a7d717dbc69de8683aaad.c7efaa30a0d80b2958b876969faa180e485944a849deee4ad482332de65365a7\n",
|
||
|
"Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'roberta.pooler.dense.bias', 'lm_head.bias', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']\n",
|
||
|
"- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
|
||
|
"- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
|
||
|
"Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.dense.weight']\n",
|
||
|
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
||
|
"02/16/2022 00:57:03 - INFO - __main__ - Freezing model weights\n",
|
||
|
"02/16/2022 00:57:03 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-12e8873686c6be8d.arrow\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/1 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 1/1 [00:00<00:00, 35.81ba/s]\n",
|
||
|
"02/16/2022 00:57:03 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-6af3944f94b779cb.arrow\n",
|
||
|
"02/16/2022 00:57:03 - INFO - __main__ - Sample 2678 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [0, 118, 437, 98, 1437, 1437, 8, 849, 6504, 39264, 122, 14, 111, 849, 3707, 9856, 1635, 1437, 2], 'labels': 0}.\n",
|
||
|
"02/16/2022 00:57:03 - INFO - __main__ - Sample 1289 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [0, 1322, 47, 849, 14178, 359, 3914, 131, 619, 101, 952, 7258, 4056, 7471, 4056, 18164, 32, 30309, 154, 15, 47, 116, 4161, 1437, 849, 4903, 21210, 849, 90, 20564, 849, 119, 40879, 3695, 4056, 7471, 4056, 18164, 1437, 2], 'labels': 1}.\n",
|
||
|
"02/16/2022 00:57:03 - INFO - __main__ - Sample 2660 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [0, 21714, 3308, 7512, 13, 127, 15382, 186, 11, 885, 4575, 1437, 1437, 2], 'labels': 0}.\n",
|
||
|
"02/16/2022 00:57:04 - INFO - __main__ - ***** Running training *****\n",
|
||
|
"02/16/2022 00:57:04 - INFO - __main__ - Num examples = 4742\n",
|
||
|
"02/16/2022 00:57:04 - INFO - __main__ - Num Epochs = 1\n",
|
||
|
"02/16/2022 00:57:04 - INFO - __main__ - Instantaneous batch size per device = 24\n",
|
||
|
"02/16/2022 00:57:04 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 24\n",
|
||
|
"02/16/2022 00:57:04 - INFO - __main__ - Gradient Accumulation steps = 1\n",
|
||
|
"02/16/2022 00:57:04 - INFO - __main__ - Total optimization steps = 198\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/198 [00:00<?, ?it/s]\n",
|
||
|
" 1%| | 1/198 [00:00<02:44, 1.20it/s]\n",
|
||
|
" 1%|1 | 2/198 [00:01<02:38, 1.24it/s]\n",
|
||
|
" 2%|1 | 3/198 [00:02<03:22, 1.04s/it]\n",
|
||
|
" 2%|2 | 4/198 [00:03<02:45, 1.17it/s]\n",
|
||
|
" 3%|2 | 5/198 [00:04<03:19, 1.03s/it]\n",
|
||
|
" 3%|3 | 6/198 [00:06<03:31, 1.10s/it]\n",
|
||
|
" 4%|3 | 7/198 [00:06<03:15, 1.03s/it]\n",
|
||
|
" 4%|4 | 8/198 [00:08<03:43, 1.18s/it]\n",
|
||
|
" 5%|4 | 9/198 [00:09<03:29, 1.11s/it]\n",
|
||
|
" 5%|5 | 10/198 [00:10<03:05, 1.01it/s]\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
" 6%|5 | 11/198 [00:10<02:55, 1.06it/s]\n",
|
||
|
" 6%|6 | 12/198 [00:11<02:57, 1.05it/s]\n",
|
||
|
" 7%|6 | 13/198 [00:12<02:56, 1.05it/s]\n",
|
||
|
" 7%|7 | 14/198 [00:13<02:41, 1.14it/s]\n",
|
||
|
" 8%|7 | 15/198 [00:14<02:49, 1.08it/s]\n",
|
||
|
" 8%|8 | 16/198 [00:15<02:47, 1.09it/s]\n",
|
||
|
" 9%|8 | 17/198 [00:16<02:52, 1.05it/s]\n",
|
||
|
" 9%|9 | 18/198 [00:18<03:28, 1.16s/it]\n",
|
||
|
" 10%|9 | 19/198 [00:19<03:23, 1.14s/it]\n",
|
||
|
" 10%|# | 20/198 [00:20<02:59, 1.01s/it]\n",
|
||
|
" 11%|# | 21/198 [00:21<03:18, 1.12s/it]\n",
|
||
|
" 11%|#1 | 22/198 [00:22<03:02, 1.03s/it]\n",
|
||
|
" 12%|#1 | 23/198 [00:23<02:52, 1.01it/s]\n",
|
||
|
" 12%|#2 | 24/198 [00:24<03:08, 1.08s/it]\n",
|
||
|
" 13%|#2 | 25/198 [00:25<03:17, 1.14s/it]\n",
|
||
|
" 13%|#3 | 26/198 [00:27<03:28, 1.21s/it]\n",
|
||
|
" 14%|#3 | 27/198 [00:27<03:05, 1.09s/it]\n",
|
||
|
" 14%|#4 | 28/198 [00:28<02:48, 1.01it/s]\n",
|
||
|
" 15%|#4 | 29/198 [00:29<02:37, 1.08it/s]\n",
|
||
|
" 15%|#5 | 30/198 [00:30<02:36, 1.07it/s]\n",
|
||
|
" 16%|#5 | 31/198 [00:31<02:28, 1.12it/s]\n",
|
||
|
" 16%|#6 | 32/198 [00:32<02:32, 1.09it/s]\n",
|
||
|
" 17%|#6 | 33/198 [00:32<02:27, 1.12it/s]\n",
|
||
|
" 17%|#7 | 34/198 [00:34<02:40, 1.02it/s]\n",
|
||
|
" 18%|#7 | 35/198 [00:35<03:02, 1.12s/it]\n",
|
||
|
" 18%|#8 | 36/198 [00:36<03:09, 1.17s/it]\n",
|
||
|
" 19%|#8 | 37/198 [00:37<02:53, 1.08s/it]\n",
|
||
|
" 19%|#9 | 38/198 [00:38<02:37, 1.02it/s]\n",
|
||
|
" 20%|#9 | 39/198 [00:39<02:50, 1.08s/it]\n",
|
||
|
" 20%|## | 40/198 [00:41<02:56, 1.12s/it]\n",
|
||
|
" 21%|## | 41/198 [00:42<03:00, 1.15s/it]\n",
|
||
|
" 21%|##1 | 42/198 [00:42<02:36, 1.00s/it]\n",
|
||
|
" 22%|##1 | 43/198 [00:44<02:58, 1.15s/it]\n",
|
||
|
" 22%|##2 | 44/198 [00:45<03:13, 1.26s/it]\n",
|
||
|
" 23%|##2 | 45/198 [00:46<03:03, 1.20s/it]\n",
|
||
|
" 23%|##3 | 46/198 [00:47<02:45, 1.09s/it]\n",
|
||
|
" 24%|##3 | 47/198 [00:49<02:54, 1.15s/it]\n",
|
||
|
" 24%|##4 | 48/198 [00:49<02:36, 1.05s/it]\n",
|
||
|
" 25%|##4 | 49/198 [00:50<02:33, 1.03s/it]\n",
|
||
|
" 25%|##5 | 50/198 [00:52<03:07, 1.27s/it]\n",
|
||
|
" 26%|##5 | 51/198 [00:53<02:44, 1.12s/it]\n",
|
||
|
" 26%|##6 | 52/198 [00:54<02:46, 1.14s/it]\n",
|
||
|
" 27%|##6 | 53/198 [00:55<02:49, 1.17s/it]\n",
|
||
|
" 27%|##7 | 54/198 [00:57<02:51, 1.19s/it]\n",
|
||
|
" 28%|##7 | 55/198 [00:58<02:57, 1.24s/it]\n",
|
||
|
" 28%|##8 | 56/198 [00:59<02:34, 1.09s/it]\n",
|
||
|
" 29%|##8 | 57/198 [01:01<03:30, 1.49s/it]\n",
|
||
|
" 29%|##9 | 58/198 [01:02<03:20, 1.43s/it]\n",
|
||
|
" 30%|##9 | 59/198 [01:04<03:12, 1.39s/it]\n",
|
||
|
" 30%|### | 60/198 [01:05<02:50, 1.23s/it]\n",
|
||
|
" 31%|### | 61/198 [01:06<02:34, 1.13s/it]\n",
|
||
|
" 31%|###1 | 62/198 [01:06<02:15, 1.00it/s]\n",
|
||
|
" 32%|###1 | 63/198 [01:07<02:09, 1.04it/s]\n",
|
||
|
" 32%|###2 | 64/198 [01:08<02:01, 1.10it/s]\n",
|
||
|
" 33%|###2 | 65/198 [01:09<02:25, 1.09s/it]\n",
|
||
|
" 33%|###3 | 66/198 [01:12<03:09, 1.44s/it]\n",
|
||
|
" 34%|###3 | 67/198 [01:13<02:54, 1.33s/it]\n",
|
||
|
" 34%|###4 | 68/198 [01:15<03:22, 1.56s/it]\n",
|
||
|
" 35%|###4 | 69/198 [01:16<03:17, 1.53s/it]\n",
|
||
|
" 35%|###5 | 70/198 [01:17<03:02, 1.43s/it]\n",
|
||
|
" 36%|###5 | 71/198 [01:18<02:39, 1.25s/it]\n",
|
||
|
" 36%|###6 | 72/198 [01:20<02:41, 1.28s/it]\n",
|
||
|
" 37%|###6 | 73/198 [01:22<03:23, 1.62s/it]\n",
|
||
|
" 37%|###7 | 74/198 [01:23<02:52, 1.39s/it]\n",
|
||
|
" 38%|###7 | 75/198 [01:24<02:34, 1.26s/it]\n",
|
||
|
" 38%|###8 | 76/198 [01:25<02:35, 1.27s/it]\n",
|
||
|
" 39%|###8 | 77/198 [01:26<02:34, 1.28s/it]\n",
|
||
|
" 39%|###9 | 78/198 [01:28<02:48, 1.40s/it]\n",
|
||
|
" 40%|###9 | 79/198 [01:29<02:21, 1.19s/it]\n",
|
||
|
" 40%|#### | 80/198 [01:30<02:25, 1.23s/it]\n",
|
||
|
" 41%|#### | 81/198 [01:31<02:13, 1.14s/it]\n",
|
||
|
" 41%|####1 | 82/198 [01:32<01:58, 1.03s/it]\n",
|
||
|
" 42%|####1 | 83/198 [01:33<01:54, 1.00it/s]\n",
|
||
|
" 42%|####2 | 84/198 [01:34<01:49, 1.04it/s]\n",
|
||
|
" 43%|####2 | 85/198 [01:35<01:58, 1.05s/it]\n",
|
||
|
" 43%|####3 | 86/198 [01:37<02:40, 1.43s/it]\n",
|
||
|
" 44%|####3 | 87/198 [01:39<02:42, 1.46s/it]\n",
|
||
|
" 44%|####4 | 88/198 [01:40<02:29, 1.36s/it]\n",
|
||
|
" 45%|####4 | 89/198 [01:41<02:11, 1.21s/it]\n",
|
||
|
" 45%|####5 | 90/198 [01:42<01:56, 1.08s/it]\n",
|
||
|
" 46%|####5 | 91/198 [01:42<01:43, 1.04it/s]\n",
|
||
|
" 46%|####6 | 92/198 [01:43<01:44, 1.02it/s]\n",
|
||
|
" 47%|####6 | 93/198 [01:45<02:04, 1.18s/it]\n",
|
||
|
" 47%|####7 | 94/198 [01:46<01:52, 1.08s/it]\n",
|
||
|
" 48%|####7 | 95/198 [01:47<01:58, 1.15s/it]\n",
|
||
|
" 48%|####8 | 96/198 [01:48<02:04, 1.22s/it]\n",
|
||
|
" 49%|####8 | 97/198 [01:49<01:54, 1.13s/it]\n",
|
||
|
" 49%|####9 | 98/198 [01:50<01:42, 1.02s/it]\n",
|
||
|
" 50%|##### | 99/198 [01:51<01:41, 1.02s/it]\n",
|
||
|
" 51%|##### | 100/198 [01:53<01:49, 1.12s/it]\n",
|
||
|
" 51%|#####1 | 101/198 [01:53<01:38, 1.02s/it]\n",
|
||
|
" 52%|#####1 | 102/198 [01:54<01:29, 1.08it/s]\n",
|
||
|
" 52%|#####2 | 103/198 [01:55<01:42, 1.08s/it]\n",
|
||
|
" 53%|#####2 | 104/198 [01:57<01:45, 1.12s/it]\n",
|
||
|
" 53%|#####3 | 105/198 [01:58<01:40, 1.08s/it]\n",
|
||
|
" 54%|#####3 | 106/198 [01:59<01:45, 1.14s/it]\n",
|
||
|
" 54%|#####4 | 107/198 [02:00<01:47, 1.18s/it]\n",
|
||
|
" 55%|#####4 | 108/198 [02:01<01:37, 1.09s/it]\n",
|
||
|
" 55%|#####5 | 109/198 [02:02<01:39, 1.11s/it]\n",
|
||
|
" 56%|#####5 | 110/198 [02:03<01:36, 1.09s/it]\n",
|
||
|
" 56%|#####6 | 111/198 [02:05<01:38, 1.14s/it]\n",
|
||
|
" 57%|#####6 | 112/198 [02:05<01:27, 1.02s/it]\n",
|
||
|
" 57%|#####7 | 113/198 [02:06<01:24, 1.00it/s]\n",
|
||
|
" 58%|#####7 | 114/198 [02:08<01:31, 1.08s/it]\n",
|
||
|
" 58%|#####8 | 115/198 [02:08<01:25, 1.03s/it]\n",
|
||
|
" 59%|#####8 | 116/198 [02:09<01:22, 1.01s/it]\n",
|
||
|
" 59%|#####9 | 117/198 [02:10<01:16, 1.05it/s]\n",
|
||
|
" 60%|#####9 | 118/198 [02:11<01:10, 1.13it/s]\n",
|
||
|
" 60%|###### | 119/198 [02:12<01:11, 1.11it/s]\n",
|
||
|
" 61%|###### | 120/198 [02:14<01:45, 1.36s/it]\n",
|
||
|
" 61%|######1 | 121/198 [02:15<01:27, 1.13s/it]\n",
|
||
|
" 62%|######1 | 122/198 [02:16<01:19, 1.04s/it]\n",
|
||
|
" 62%|######2 | 123/198 [02:17<01:24, 1.13s/it]\n",
|
||
|
" 63%|######2 | 124/198 [02:18<01:28, 1.19s/it]\n",
|
||
|
" 63%|######3 | 125/198 [02:20<01:29, 1.23s/it]\n",
|
||
|
" 64%|######3 | 126/198 [02:20<01:15, 1.05s/it]\n",
|
||
|
" 64%|######4 | 127/198 [02:21<01:16, 1.08s/it]\n",
|
||
|
" 65%|######4 | 128/198 [02:22<01:07, 1.04it/s]\n",
|
||
|
" 65%|######5 | 129/198 [02:24<01:26, 1.26s/it]\n",
|
||
|
" 66%|######5 | 130/198 [02:25<01:13, 1.08s/it]\n",
|
||
|
" 66%|######6 | 131/198 [02:26<01:05, 1.03it/s]\n",
|
||
|
" 67%|######6 | 132/198 [02:27<01:15, 1.14s/it]\n",
|
||
|
" 67%|######7 | 133/198 [02:28<01:06, 1.02s/it]\n",
|
||
|
" 68%|######7 | 134/198 [02:28<00:58, 1.10it/s]\n",
|
||
|
" 68%|######8 | 135/198 [02:29<00:53, 1.18it/s]\n",
|
||
|
" 69%|######8 | 136/198 [02:30<01:01, 1.01it/s]\n",
|
||
|
" 69%|######9 | 137/198 [02:31<00:59, 1.03it/s]\n",
|
||
|
" 70%|######9 | 138/198 [02:33<01:07, 1.12s/it]\n",
|
||
|
" 70%|####### | 139/198 [02:34<00:59, 1.01s/it]\n",
|
||
|
" 71%|####### | 140/198 [02:35<00:57, 1.01it/s]\n",
|
||
|
" 71%|#######1 | 141/198 [02:37<01:14, 1.30s/it]\n",
|
||
|
" 72%|#######1 | 142/198 [02:39<01:31, 1.63s/it]\n",
|
||
|
" 72%|#######2 | 143/198 [02:40<01:17, 1.41s/it]\n",
|
||
|
" 73%|#######2 | 144/198 [02:41<01:07, 1.25s/it]\n",
|
||
|
" 73%|#######3 | 145/198 [02:42<01:01, 1.17s/it]\n",
|
||
|
" 74%|#######3 | 146/198 [02:42<00:53, 1.04s/it]\n",
|
||
|
" 74%|#######4 | 147/198 [02:43<00:46, 1.10it/s]\n",
|
||
|
" 75%|#######4 | 148/198 [02:44<00:45, 1.09it/s]\n",
|
||
|
" 75%|#######5 | 149/198 [02:46<01:05, 1.34s/it]\n",
|
||
|
" 76%|#######5 | 150/198 [02:47<00:58, 1.22s/it]\n",
|
||
|
" 76%|#######6 | 151/198 [02:49<01:02, 1.32s/it]\n",
|
||
|
" 77%|#######6 | 152/198 [02:51<01:15, 1.65s/it]\n",
|
||
|
" 77%|#######7 | 153/198 [02:52<01:01, 1.38s/it]\n",
|
||
|
" 78%|#######7 | 154/198 [02:53<01:00, 1.38s/it]\n",
|
||
|
" 78%|#######8 | 155/198 [02:54<00:52, 1.22s/it]\n",
|
||
|
" 79%|#######8 | 156/198 [02:55<00:49, 1.18s/it]\n",
|
||
|
" 79%|#######9 | 157/198 [02:56<00:44, 1.09s/it]\n",
|
||
|
" 80%|#######9 | 158/198 [02:57<00:40, 1.02s/it]\n",
|
||
|
" 80%|######## | 159/198 [02:58<00:40, 1.03s/it]\n",
|
||
|
" 81%|######## | 160/198 [02:59<00:41, 1.09s/it]\n",
|
||
|
" 81%|########1 | 161/198 [03:00<00:37, 1.01s/it]\n",
|
||
|
" 82%|########1 | 162/198 [03:01<00:32, 1.10it/s]\n",
|
||
|
" 82%|########2 | 163/198 [03:02<00:31, 1.12it/s]\n",
|
||
|
" 83%|########2 | 164/198 [03:03<00:32, 1.04it/s]\n",
|
||
|
" 83%|########3 | 165/198 [03:05<00:46, 1.41s/it]\n",
|
||
|
" 84%|########3 | 166/198 [03:06<00:40, 1.25s/it]\n",
|
||
|
" 84%|########4 | 167/198 [03:07<00:37, 1.22s/it]\n",
|
||
|
" 85%|########4 | 168/198 [03:09<00:37, 1.25s/it]\n",
|
||
|
" 85%|########5 | 169/198 [03:10<00:36, 1.26s/it]\n",
|
||
|
" 86%|########5 | 170/198 [03:11<00:31, 1.12s/it]\n",
|
||
|
" 86%|########6 | 171/198 [03:12<00:31, 1.18s/it]\n",
|
||
|
" 87%|########6 | 172/198 [03:13<00:31, 1.20s/it]\n",
|
||
|
" 87%|########7 | 173/198 [03:14<00:28, 1.14s/it]\n",
|
||
|
" 88%|########7 | 174/198 [03:15<00:24, 1.03s/it]\n",
|
||
|
" 88%|########8 | 175/198 [03:16<00:22, 1.04it/s]\n",
|
||
|
" 89%|########8 | 176/198 [03:17<00:22, 1.01s/it]\n",
|
||
|
" 89%|########9 | 177/198 [03:18<00:22, 1.07s/it]\n",
|
||
|
" 90%|########9 | 178/198 [03:21<00:29, 1.48s/it]\n",
|
||
|
" 90%|######### | 179/198 [03:22<00:25, 1.37s/it]\n",
|
||
|
" 91%|######### | 180/198 [03:23<00:21, 1.22s/it]\n",
|
||
|
" 91%|#########1| 181/198 [03:23<00:19, 1.13s/it]\n",
|
||
|
" 92%|#########1| 182/198 [03:25<00:17, 1.12s/it]\n",
|
||
|
" 92%|#########2| 183/198 [03:25<00:15, 1.02s/it]\n",
|
||
|
" 93%|#########2| 184/198 [03:26<00:13, 1.02it/s]\n",
|
||
|
" 93%|#########3| 185/198 [03:28<00:13, 1.08s/it]\n",
|
||
|
" 94%|#########3| 186/198 [03:28<00:11, 1.01it/s]\n",
|
||
|
" 94%|#########4| 187/198 [03:30<00:12, 1.09s/it]\n",
|
||
|
" 95%|#########4| 188/198 [03:32<00:13, 1.34s/it]\n",
|
||
|
" 95%|#########5| 189/198 [03:33<00:10, 1.22s/it]\n",
|
||
|
" 96%|#########5| 190/198 [03:33<00:08, 1.11s/it]\n",
|
||
|
" 96%|#########6| 191/198 [03:34<00:07, 1.00s/it]\n",
|
||
|
" 97%|#########6| 192/198 [03:36<00:06, 1.16s/it]\n",
|
||
|
" 97%|#########7| 193/198 [03:37<00:05, 1.12s/it]\n",
|
||
|
" 98%|#########7| 194/198 [03:38<00:04, 1.08s/it]\n",
|
||
|
" 98%|#########8| 195/198 [03:38<00:02, 1.02it/s]\n",
|
||
|
" 99%|#########8| 196/198 [03:40<00:02, 1.22s/it]\n",
|
||
|
" 99%|#########9| 197/198 [03:41<00:01, 1.16s/it]\n",
|
||
|
"100%|##########| 198/198 [03:42<00:00, 1.05s/it]02/16/2022 01:01:14 - INFO - __main__ - Epoch 0: {'accuracy': 0.938}\n",
|
||
|
"02/16/2022 01:01:37 - INFO - __main__ - Test-set evaluation: {'accuracy': 1.0}\n",
|
||
|
"Configuration saved in out/tweet/roberta_version_2\\config.json\n",
|
||
|
"Model weights saved in out/tweet/roberta_version_2\\pytorch_model.bin\n",
|
||
|
"tokenizer config file saved in out/tweet/roberta_version_2\\tokenizer_config.json\n",
|
||
|
"Special tokens file saved in out/tweet/roberta_version_2\\special_tokens_map.json\n",
|
||
|
"\n",
|
||
|
"100%|##########| 198/198 [04:33<00:00, 1.38s/it]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"!python run_glue_no_trainer.py \\\n",
|
||
|
" --model_name_or_path roberta-base \\\n",
|
||
|
" --train_file data/train.json \\\n",
|
||
|
" --validation_file data/valid.json \\\n",
|
||
|
" --test_file data/test.json \\\n",
|
||
|
" --per_device_train_batch_size 24 \\\n",
|
||
|
" --per_device_eval_batch_size 24 \\\n",
|
||
|
" --max_length 128 \\\n",
|
||
|
" --freeze_model \\\n",
|
||
|
" --learning_rate 2e-5 \\\n",
|
||
|
" --num_train_epochs 1 \\\n",
|
||
|
" --output_dir out/tweet/roberta_version_2"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Roberta version 3"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 7,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"02/16/2022 01:01:39 - INFO - __main__ - Distributed environment: NO\n",
|
||
|
"Num processes: 1\n",
|
||
|
"Process index: 0\n",
|
||
|
"Local process index: 0\n",
|
||
|
"Device: cpu\n",
|
||
|
"Use FP16 precision: False\n",
|
||
|
"\n",
|
||
|
"02/16/2022 01:01:40 - WARNING - datasets.builder - Using custom data configuration default-67c9d932a627b7b8\n",
|
||
|
"02/16/2022 01:01:40 - WARNING - datasets.builder - Reusing dataset json (C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426)\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/3 [00:00<?, ?it/s]\n",
|
||
|
"100%|##########| 3/3 [00:00<00:00, 1507.66it/s]\n",
|
||
|
"loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b\n",
|
||
|
"Model config RobertaConfig {\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"RobertaForMaskedLM\"\n",
|
||
|
" ],\n",
|
||
|
" \"attention_probs_dropout_prob\": 0.1,\n",
|
||
|
" \"bos_token_id\": 0,\n",
|
||
|
" \"classifier_dropout\": null,\n",
|
||
|
" \"eos_token_id\": 2,\n",
|
||
|
" \"hidden_act\": \"gelu\",\n",
|
||
|
" \"hidden_dropout_prob\": 0.1,\n",
|
||
|
" \"hidden_size\": 768,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"intermediate_size\": 3072,\n",
|
||
|
" \"layer_norm_eps\": 1e-05,\n",
|
||
|
" \"max_position_embeddings\": 514,\n",
|
||
|
" \"model_type\": \"roberta\",\n",
|
||
|
" \"num_attention_heads\": 12,\n",
|
||
|
" \"num_hidden_layers\": 12,\n",
|
||
|
" \"pad_token_id\": 1,\n",
|
||
|
" \"position_embedding_type\": \"absolute\",\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"type_vocab_size\": 1,\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50265\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
|
||
|
"loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b\n",
|
||
|
"Model config RobertaConfig {\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"RobertaForMaskedLM\"\n",
|
||
|
" ],\n",
|
||
|
" \"attention_probs_dropout_prob\": 0.1,\n",
|
||
|
" \"bos_token_id\": 0,\n",
|
||
|
" \"classifier_dropout\": null,\n",
|
||
|
" \"eos_token_id\": 2,\n",
|
||
|
" \"hidden_act\": \"gelu\",\n",
|
||
|
" \"hidden_dropout_prob\": 0.1,\n",
|
||
|
" \"hidden_size\": 768,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"intermediate_size\": 3072,\n",
|
||
|
" \"layer_norm_eps\": 1e-05,\n",
|
||
|
" \"max_position_embeddings\": 514,\n",
|
||
|
" \"model_type\": \"roberta\",\n",
|
||
|
" \"num_attention_heads\": 12,\n",
|
||
|
" \"num_hidden_layers\": 12,\n",
|
||
|
" \"pad_token_id\": 1,\n",
|
||
|
" \"position_embedding_type\": \"absolute\",\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"type_vocab_size\": 1,\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50265\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/vocab.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\d3ccdbfeb9aaa747ef20432d4976c32ee3fa69663b379deb253ccfce2bb1fdc5.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/merges.txt from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\cafdecc90fcab17011e12ac813dd574b4b3fea39da6dd817813efa010262ff3f.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/tokenizer.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\d53fc0fa09b8342651efd4073d75e19617b3e51287c2a535becda5808a8db287.fc9576039592f026ad76a1c231b89aee8668488c671dfbe6616bab2ed298d730\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/added_tokens.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/special_tokens_map.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/tokenizer_config.json from cache at None\n",
|
||
|
"loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b\n",
|
||
|
"Model config RobertaConfig {\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"RobertaForMaskedLM\"\n",
|
||
|
" ],\n",
|
||
|
" \"attention_probs_dropout_prob\": 0.1,\n",
|
||
|
" \"bos_token_id\": 0,\n",
|
||
|
" \"classifier_dropout\": null,\n",
|
||
|
" \"eos_token_id\": 2,\n",
|
||
|
" \"hidden_act\": \"gelu\",\n",
|
||
|
" \"hidden_dropout_prob\": 0.1,\n",
|
||
|
" \"hidden_size\": 768,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"intermediate_size\": 3072,\n",
|
||
|
" \"layer_norm_eps\": 1e-05,\n",
|
||
|
" \"max_position_embeddings\": 514,\n",
|
||
|
" \"model_type\": \"roberta\",\n",
|
||
|
" \"num_attention_heads\": 12,\n",
|
||
|
" \"num_hidden_layers\": 12,\n",
|
||
|
" \"pad_token_id\": 1,\n",
|
||
|
" \"position_embedding_type\": \"absolute\",\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"type_vocab_size\": 1,\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50265\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"02/16/2022 01:01:46 - INFO - __main__ - Return hidden states from model: False\n",
|
||
|
"02/16/2022 01:01:46 - INFO - __main__ - Using implementation from: RobertaForSequenceClassificationCustomAlternative\n",
|
||
|
"loading weights file https://huggingface.co/roberta-base/resolve/main/pytorch_model.bin from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\51ba668f7ff34e7cdfa9561e8361747738113878850a7d717dbc69de8683aaad.c7efaa30a0d80b2958b876969faa180e485944a849deee4ad482332de65365a7\n",
|
||
|
"Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassificationCustomAlternative: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.bias', 'roberta.pooler.dense.weight', 'lm_head.decoder.weight', 'lm_head.dense.bias']\n",
|
||
|
"- This IS expected if you are initializing RobertaForSequenceClassificationCustomAlternative from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
|
||
|
"- This IS NOT expected if you are initializing RobertaForSequenceClassificationCustomAlternative from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
|
||
|
"Some weights of RobertaForSequenceClassificationCustomAlternative were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense_2.weight', 'classifier.dense_1_input.weight', 'classifier.dense_2.bias', 'classifier.out_proj.weight', 'classifier.dense_1_hidden.bias', 'classifier.dense_1_input.bias', 'classifier.dense_1_hidden.weight']\n",
|
||
|
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
||
|
"02/16/2022 01:01:48 - INFO - __main__ - Freezing model weights\n",
|
||
|
"02/16/2022 01:01:48 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-ba2b749ff70d20c2.arrow\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/1 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 1/1 [00:00<00:00, 29.49ba/s]\n",
|
||
|
"02/16/2022 01:01:48 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-376097e0887bad71.arrow\n",
|
||
|
"02/16/2022 01:01:48 - INFO - __main__ - Sample 4466 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [0, 8338, 365, 849, 11970, 409, 31, 3970, 727, 849, 28481, 268, 15, 849, 48056, 939, 437, 98, 1437, 1437, 849, 8656, 849, 8656, 254, 849, 45864, 849, 26949, 8585, 849, 12689, 627, 17693, 2], 'labels': 0}.\n",
|
||
|
"02/16/2022 01:01:48 - INFO - __main__ - Sample 979 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [0, 1039, 12105, 849, 18897, 2527, 718, 5, 4117, 12, 267, 13760, 4289, 16, 7, 6876, 14, 952, 7258, 4056, 7471, 4056, 48, 405, 531, 33, 57, 16, 354, 4, 3695, 4056, 7471, 4056, 46, 1437, 952, 7258, 4056, 7471, 4056, 18164, 1437, 2], 'labels': 1}.\n",
|
||
|
"02/16/2022 01:01:48 - INFO - __main__ - Sample 2927 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [0, 1039, 12105, 77, 16, 5, 92, 2642, 145, 703, 59, 787, 12105, 8, 110, 1108, 62, 116, 1437, 1437, 849, 17693, 1843, 10339, 4489, 849, 10120, 571, 5434, 2], 'labels': 0}.\n",
|
||
|
"02/16/2022 01:01:49 - INFO - __main__ - ***** Running training *****\n",
|
||
|
"02/16/2022 01:01:49 - INFO - __main__ - Num examples = 4742\n",
|
||
|
"02/16/2022 01:01:49 - INFO - __main__ - Num Epochs = 1\n",
|
||
|
"02/16/2022 01:01:49 - INFO - __main__ - Instantaneous batch size per device = 24\n",
|
||
|
"02/16/2022 01:01:49 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 24\n",
|
||
|
"02/16/2022 01:01:49 - INFO - __main__ - Gradient Accumulation steps = 1\n",
|
||
|
"02/16/2022 01:01:49 - INFO - __main__ - Total optimization steps = 198\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/198 [00:00<?, ?it/s]\n",
|
||
|
" 1%| | 1/198 [00:00<02:57, 1.11it/s]\n",
|
||
|
" 1%|1 | 2/198 [00:02<03:38, 1.12s/it]\n",
|
||
|
" 2%|1 | 3/198 [00:03<04:04, 1.25s/it]\n",
|
||
|
" 2%|2 | 4/198 [00:04<03:42, 1.15s/it]\n",
|
||
|
" 3%|2 | 5/198 [00:05<03:13, 1.00s/it]\n",
|
||
|
" 3%|3 | 6/198 [00:06<03:48, 1.19s/it]\n",
|
||
|
" 4%|3 | 7/198 [00:07<03:25, 1.08s/it]\n",
|
||
|
" 4%|4 | 8/198 [00:09<04:20, 1.37s/it]\n",
|
||
|
" 5%|4 | 9/198 [00:12<05:25, 1.72s/it]\n",
|
||
|
" 5%|5 | 10/198 [00:13<04:31, 1.45s/it]\n",
|
||
|
" 6%|5 | 11/198 [00:14<04:18, 1.38s/it]\n",
|
||
|
" 6%|6 | 12/198 [00:15<04:15, 1.38s/it]\n",
|
||
|
" 7%|6 | 13/198 [00:17<04:30, 1.46s/it]\n",
|
||
|
" 7%|7 | 14/198 [00:18<04:13, 1.38s/it]\n",
|
||
|
" 8%|7 | 15/198 [00:19<04:11, 1.37s/it]\n",
|
||
|
" 8%|8 | 16/198 [00:21<04:22, 1.44s/it]\n",
|
||
|
" 9%|8 | 17/198 [00:22<04:27, 1.48s/it]\n",
|
||
|
" 9%|9 | 18/198 [00:23<03:47, 1.27s/it]\n",
|
||
|
" 10%|9 | 19/198 [00:25<04:00, 1.34s/it]\n",
|
||
|
" 10%|# | 20/198 [00:26<03:28, 1.17s/it]\n",
|
||
|
" 11%|# | 21/198 [00:26<03:13, 1.09s/it]\n",
|
||
|
" 11%|#1 | 22/198 [00:29<04:03, 1.38s/it]\n",
|
||
|
" 12%|#1 | 23/198 [00:29<03:29, 1.20s/it]\n",
|
||
|
" 12%|#2 | 24/198 [00:30<03:08, 1.08s/it]\n",
|
||
|
" 13%|#2 | 25/198 [00:31<03:13, 1.12s/it]\n",
|
||
|
" 13%|#3 | 26/198 [00:32<03:01, 1.05s/it]\n",
|
||
|
" 14%|#3 | 27/198 [00:34<03:15, 1.15s/it]\n",
|
||
|
" 14%|#4 | 28/198 [00:35<03:23, 1.20s/it]\n",
|
||
|
" 15%|#4 | 29/198 [00:36<03:28, 1.23s/it]\n",
|
||
|
" 15%|#5 | 30/198 [00:38<03:33, 1.27s/it]\n",
|
||
|
" 16%|#5 | 31/198 [00:39<03:36, 1.29s/it]\n",
|
||
|
" 16%|#6 | 32/198 [00:40<03:13, 1.17s/it]\n",
|
||
|
" 17%|#6 | 33/198 [00:41<03:18, 1.20s/it]\n",
|
||
|
" 17%|#7 | 34/198 [00:42<02:59, 1.09s/it]\n",
|
||
|
" 18%|#7 | 35/198 [00:43<03:17, 1.21s/it]\n",
|
||
|
" 18%|#8 | 36/198 [00:44<03:04, 1.14s/it]\n",
|
||
|
" 19%|#8 | 37/198 [00:46<03:12, 1.19s/it]\n",
|
||
|
" 19%|#9 | 38/198 [00:47<03:07, 1.17s/it]\n",
|
||
|
" 20%|#9 | 39/198 [00:48<03:12, 1.21s/it]\n",
|
||
|
" 20%|## | 40/198 [00:49<02:52, 1.09s/it]\n",
|
||
|
" 21%|## | 41/198 [00:50<02:36, 1.00it/s]\n",
|
||
|
" 21%|##1 | 42/198 [00:51<02:31, 1.03it/s]\n",
|
||
|
" 22%|##1 | 43/198 [00:52<02:31, 1.02it/s]\n",
|
||
|
" 22%|##2 | 44/198 [00:53<02:34, 1.00s/it]\n",
|
||
|
" 23%|##2 | 45/198 [00:54<02:27, 1.04it/s]\n",
|
||
|
" 23%|##3 | 46/198 [00:54<02:18, 1.10it/s]\n",
|
||
|
" 24%|##3 | 47/198 [00:55<02:12, 1.14it/s]\n",
|
||
|
" 24%|##4 | 48/198 [00:56<02:12, 1.13it/s]\n",
|
||
|
" 25%|##4 | 49/198 [00:57<02:32, 1.03s/it]\n",
|
||
|
" 25%|##5 | 50/198 [01:00<03:31, 1.43s/it]\n",
|
||
|
" 26%|##5 | 51/198 [01:01<03:22, 1.38s/it]\n",
|
||
|
" 26%|##6 | 52/198 [01:02<03:01, 1.25s/it]\n",
|
||
|
" 27%|##6 | 53/198 [01:03<02:39, 1.10s/it]\n",
|
||
|
" 27%|##7 | 54/198 [01:04<02:33, 1.06s/it]\n",
|
||
|
" 28%|##7 | 55/198 [01:05<02:33, 1.07s/it]\n",
|
||
|
" 28%|##8 | 56/198 [01:06<02:18, 1.03it/s]\n",
|
||
|
" 29%|##8 | 57/198 [01:06<02:15, 1.04it/s]\n",
|
||
|
" 29%|##9 | 58/198 [01:07<02:16, 1.02it/s]\n",
|
||
|
" 30%|##9 | 59/198 [01:08<02:06, 1.10it/s]\n",
|
||
|
" 30%|### | 60/198 [01:09<02:02, 1.13it/s]\n",
|
||
|
" 31%|### | 61/198 [01:10<02:02, 1.12it/s]\n",
|
||
|
" 31%|###1 | 62/198 [01:11<02:18, 1.02s/it]\n",
|
||
|
" 32%|###1 | 63/198 [01:12<02:16, 1.01s/it]\n",
|
||
|
" 32%|###2 | 64/198 [01:13<02:01, 1.10it/s]\n",
|
||
|
" 33%|###2 | 65/198 [01:14<02:05, 1.06it/s]\n",
|
||
|
" 33%|###3 | 66/198 [01:15<02:23, 1.09s/it]\n",
|
||
|
" 34%|###3 | 67/198 [01:16<02:08, 1.02it/s]\n",
|
||
|
" 34%|###4 | 68/198 [01:17<02:04, 1.04it/s]\n",
|
||
|
" 35%|###4 | 69/198 [01:18<02:13, 1.03s/it]\n",
|
||
|
" 35%|###5 | 70/198 [01:19<02:06, 1.01it/s]\n",
|
||
|
" 36%|###5 | 71/198 [01:20<01:56, 1.09it/s]\n",
|
||
|
" 36%|###6 | 72/198 [01:21<01:56, 1.08it/s]\n",
|
||
|
" 37%|###6 | 73/198 [01:22<02:10, 1.04s/it]\n",
|
||
|
" 37%|###7 | 74/198 [01:23<02:20, 1.13s/it]\n",
|
||
|
" 38%|###7 | 75/198 [01:24<02:11, 1.07s/it]\n",
|
||
|
" 38%|###8 | 76/198 [01:26<02:19, 1.15s/it]\n",
|
||
|
" 39%|###8 | 77/198 [01:27<02:32, 1.26s/it]\n",
|
||
|
" 39%|###9 | 78/198 [01:28<02:18, 1.15s/it]\n",
|
||
|
" 40%|###9 | 79/198 [01:29<02:09, 1.09s/it]\n",
|
||
|
" 40%|#### | 80/198 [01:30<02:00, 1.02s/it]\n",
|
||
|
" 41%|#### | 81/198 [01:31<01:51, 1.05it/s]\n",
|
||
|
" 41%|####1 | 82/198 [01:32<01:51, 1.04it/s]\n",
|
||
|
" 42%|####1 | 83/198 [01:33<01:59, 1.04s/it]\n",
|
||
|
" 42%|####2 | 84/198 [01:34<01:51, 1.03it/s]\n",
|
||
|
" 43%|####2 | 85/198 [01:35<01:53, 1.00s/it]\n",
|
||
|
" 43%|####3 | 86/198 [01:36<01:50, 1.02it/s]\n",
|
||
|
" 44%|####3 | 87/198 [01:37<01:54, 1.03s/it]\n",
|
||
|
" 44%|####4 | 88/198 [01:38<01:44, 1.05it/s]\n",
|
||
|
" 45%|####4 | 89/198 [01:39<01:46, 1.03it/s]\n",
|
||
|
" 45%|####5 | 90/198 [01:40<01:53, 1.05s/it]\n",
|
||
|
" 46%|####5 | 91/198 [01:41<02:01, 1.14s/it]\n",
|
||
|
" 46%|####6 | 92/198 [01:44<02:40, 1.52s/it]\n",
|
||
|
" 47%|####6 | 93/198 [01:46<03:02, 1.74s/it]\n",
|
||
|
" 47%|####7 | 94/198 [01:47<02:34, 1.49s/it]\n",
|
||
|
" 48%|####7 | 95/198 [01:48<02:10, 1.27s/it]\n",
|
||
|
" 48%|####8 | 96/198 [01:49<02:11, 1.29s/it]\n",
|
||
|
" 49%|####8 | 97/198 [01:50<02:05, 1.24s/it]\n",
|
||
|
" 49%|####9 | 98/198 [01:51<02:01, 1.21s/it]\n",
|
||
|
" 50%|##### | 99/198 [01:52<01:52, 1.14s/it]\n",
|
||
|
" 51%|##### | 100/198 [01:53<01:41, 1.03s/it]\n",
|
||
|
" 51%|#####1 | 101/198 [01:54<01:42, 1.05s/it]\n",
|
||
|
" 52%|#####1 | 102/198 [01:55<01:50, 1.15s/it]\n",
|
||
|
" 52%|#####2 | 103/198 [01:56<01:38, 1.04s/it]\n",
|
||
|
" 53%|#####2 | 104/198 [01:57<01:42, 1.09s/it]\n",
|
||
|
" 53%|#####3 | 105/198 [01:59<02:01, 1.31s/it]\n",
|
||
|
" 54%|#####3 | 106/198 [02:01<02:13, 1.46s/it]\n",
|
||
|
" 54%|#####4 | 107/198 [02:02<02:09, 1.42s/it]\n",
|
||
|
" 55%|#####4 | 108/198 [02:03<01:51, 1.24s/it]\n",
|
||
|
" 55%|#####5 | 109/198 [02:04<01:48, 1.22s/it]\n",
|
||
|
" 56%|#####5 | 110/198 [02:05<01:35, 1.09s/it]\n",
|
||
|
" 56%|#####6 | 111/198 [02:06<01:28, 1.02s/it]\n",
|
||
|
" 57%|#####6 | 112/198 [02:07<01:20, 1.06it/s]\n",
|
||
|
" 57%|#####7 | 113/198 [02:08<01:25, 1.00s/it]\n",
|
||
|
" 58%|#####7 | 114/198 [02:09<01:16, 1.10it/s]\n",
|
||
|
" 58%|#####8 | 115/198 [02:09<01:13, 1.13it/s]\n",
|
||
|
" 59%|#####8 | 116/198 [02:10<01:09, 1.18it/s]\n",
|
||
|
" 59%|#####9 | 117/198 [02:12<01:20, 1.00it/s]\n",
|
||
|
" 60%|#####9 | 118/198 [02:12<01:16, 1.04it/s]\n",
|
||
|
" 60%|###### | 119/198 [02:14<01:24, 1.06s/it]\n",
|
||
|
" 61%|###### | 120/198 [02:15<01:18, 1.00s/it]\n",
|
||
|
" 61%|######1 | 121/198 [02:17<01:40, 1.31s/it]\n",
|
||
|
" 62%|######1 | 122/198 [02:18<01:30, 1.19s/it]\n",
|
||
|
" 62%|######2 | 123/198 [02:19<01:28, 1.18s/it]\n",
|
||
|
" 63%|######2 | 124/198 [02:20<01:30, 1.22s/it]\n",
|
||
|
" 63%|######3 | 125/198 [02:21<01:21, 1.12s/it]\n",
|
||
|
" 64%|######3 | 126/198 [02:22<01:15, 1.05s/it]\n",
|
||
|
" 64%|######4 | 127/198 [02:23<01:16, 1.08s/it]\n",
|
||
|
" 65%|######4 | 128/198 [02:25<01:44, 1.49s/it]\n",
|
||
|
" 65%|######5 | 129/198 [02:27<01:40, 1.46s/it]\n",
|
||
|
" 66%|######5 | 130/198 [02:28<01:36, 1.42s/it]\n",
|
||
|
" 66%|######6 | 131/198 [02:29<01:26, 1.29s/it]\n",
|
||
|
" 67%|######6 | 132/198 [02:30<01:25, 1.29s/it]\n",
|
||
|
" 67%|######7 | 133/198 [02:31<01:16, 1.18s/it]\n",
|
||
|
" 68%|######7 | 134/198 [02:32<01:07, 1.06s/it]\n",
|
||
|
" 68%|######8 | 135/198 [02:34<01:17, 1.23s/it]\n",
|
||
|
" 69%|######8 | 136/198 [02:35<01:16, 1.23s/it]\n",
|
||
|
" 69%|######9 | 137/198 [02:36<01:05, 1.07s/it]\n",
|
||
|
" 70%|######9 | 138/198 [02:38<01:21, 1.36s/it]\n",
|
||
|
" 70%|####### | 139/198 [02:40<01:37, 1.66s/it]\n",
|
||
|
" 71%|####### | 140/198 [02:41<01:24, 1.45s/it]\n",
|
||
|
" 71%|#######1 | 141/198 [02:42<01:20, 1.41s/it]\n",
|
||
|
" 72%|#######1 | 142/198 [02:44<01:18, 1.40s/it]\n",
|
||
|
" 72%|#######2 | 143/198 [02:44<01:06, 1.20s/it]\n",
|
||
|
" 73%|#######2 | 144/198 [02:45<01:01, 1.13s/it]\n",
|
||
|
" 73%|#######3 | 145/198 [02:48<01:20, 1.52s/it]\n",
|
||
|
" 74%|#######3 | 146/198 [02:49<01:21, 1.57s/it]\n",
|
||
|
" 74%|#######4 | 147/198 [02:51<01:16, 1.50s/it]\n",
|
||
|
" 75%|#######4 | 148/198 [02:52<01:12, 1.45s/it]\n",
|
||
|
" 75%|#######5 | 149/198 [02:53<01:03, 1.29s/it]\n",
|
||
|
" 76%|#######5 | 150/198 [02:54<01:02, 1.30s/it]\n",
|
||
|
" 76%|#######6 | 151/198 [02:55<00:56, 1.20s/it]\n",
|
||
|
" 77%|#######6 | 152/198 [02:56<00:49, 1.08s/it]\n",
|
||
|
" 77%|#######7 | 153/198 [02:58<00:55, 1.23s/it]\n",
|
||
|
" 78%|#######7 | 154/198 [02:59<00:51, 1.16s/it]\n",
|
||
|
" 78%|#######8 | 155/198 [03:00<00:45, 1.06s/it]\n",
|
||
|
" 79%|#######8 | 156/198 [03:00<00:41, 1.02it/s]\n",
|
||
|
" 79%|#######9 | 157/198 [03:01<00:41, 1.02s/it]\n",
|
||
|
" 80%|#######9 | 158/198 [03:03<00:40, 1.02s/it]\n",
|
||
|
" 80%|######## | 159/198 [03:03<00:37, 1.05it/s]\n",
|
||
|
" 81%|######## | 160/198 [03:05<00:39, 1.05s/it]\n",
|
||
|
" 81%|########1 | 161/198 [03:06<00:41, 1.13s/it]\n",
|
||
|
" 82%|########1 | 162/198 [03:07<00:36, 1.00s/it]\n",
|
||
|
" 82%|########2 | 163/198 [03:08<00:38, 1.10s/it]\n",
|
||
|
" 83%|########2 | 164/198 [03:09<00:36, 1.08s/it]\n",
|
||
|
" 83%|########3 | 165/198 [03:10<00:34, 1.03s/it]\n",
|
||
|
" 84%|########3 | 166/198 [03:11<00:32, 1.03s/it]\n",
|
||
|
" 84%|########4 | 167/198 [03:12<00:29, 1.04it/s]\n",
|
||
|
" 85%|########4 | 168/198 [03:13<00:29, 1.02it/s]\n",
|
||
|
" 85%|########5 | 169/198 [03:15<00:36, 1.25s/it]\n",
|
||
|
" 86%|########5 | 170/198 [03:16<00:35, 1.25s/it]\n",
|
||
|
" 86%|########6 | 171/198 [03:17<00:31, 1.18s/it]\n",
|
||
|
" 87%|########6 | 172/198 [03:18<00:29, 1.13s/it]\n",
|
||
|
" 87%|########7 | 173/198 [03:19<00:27, 1.09s/it]\n",
|
||
|
" 88%|########7 | 174/198 [03:20<00:25, 1.05s/it]\n",
|
||
|
" 88%|########8 | 175/198 [03:21<00:26, 1.16s/it]\n",
|
||
|
" 89%|########8 | 176/198 [03:23<00:26, 1.20s/it]\n",
|
||
|
" 89%|########9 | 177/198 [03:24<00:25, 1.20s/it]\n",
|
||
|
" 90%|########9 | 178/198 [03:26<00:30, 1.55s/it]\n",
|
||
|
" 90%|######### | 179/198 [03:27<00:27, 1.46s/it]\n",
|
||
|
" 91%|######### | 180/198 [03:28<00:22, 1.26s/it]\n",
|
||
|
" 91%|#########1| 181/198 [03:31<00:27, 1.63s/it]\n",
|
||
|
" 92%|#########1| 182/198 [03:31<00:22, 1.38s/it]\n",
|
||
|
" 92%|#########2| 183/198 [03:33<00:19, 1.28s/it]\n",
|
||
|
" 93%|#########2| 184/198 [03:34<00:18, 1.29s/it]\n",
|
||
|
" 93%|#########3| 185/198 [03:35<00:17, 1.37s/it]\n",
|
||
|
" 94%|#########3| 186/198 [03:37<00:15, 1.30s/it]\n",
|
||
|
" 94%|#########4| 187/198 [03:38<00:14, 1.31s/it]\n",
|
||
|
" 95%|#########4| 188/198 [03:39<00:11, 1.15s/it]\n",
|
||
|
" 95%|#########5| 189/198 [03:41<00:13, 1.49s/it]\n",
|
||
|
" 96%|#########5| 190/198 [03:42<00:10, 1.27s/it]\n",
|
||
|
" 96%|#########6| 191/198 [03:43<00:08, 1.27s/it]\n",
|
||
|
" 97%|#########6| 192/198 [03:44<00:07, 1.22s/it]\n",
|
||
|
" 97%|#########7| 193/198 [03:45<00:05, 1.10s/it]\n",
|
||
|
" 98%|#########7| 194/198 [03:46<00:03, 1.01it/s]\n",
|
||
|
" 98%|#########8| 195/198 [03:47<00:02, 1.03it/s]\n",
|
||
|
" 99%|#########8| 196/198 [03:47<00:01, 1.11it/s]\n",
|
||
|
" 99%|#########9| 197/198 [03:48<00:00, 1.01it/s]\n",
|
||
|
"100%|##########| 198/198 [03:49<00:00, 1.19it/s]02/16/2022 01:06:06 - INFO - __main__ - Epoch 0: {'accuracy': 0.938}\n",
|
||
|
"02/16/2022 01:06:29 - INFO - __main__ - Test-set evaluation: {'accuracy': 1.0}\n",
|
||
|
"Configuration saved in out/tweet/roberta_version_3\\config.json\n",
|
||
|
"Model weights saved in out/tweet/roberta_version_3\\pytorch_model.bin\n",
|
||
|
"tokenizer config file saved in out/tweet/roberta_version_3\\tokenizer_config.json\n",
|
||
|
"Special tokens file saved in out/tweet/roberta_version_3\\special_tokens_map.json\n",
|
||
|
"\n",
|
||
|
"100%|##########| 198/198 [04:40<00:00, 1.42s/it]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"!python run_glue_no_trainer.py \\\n",
|
||
|
" --model_name_or_path roberta-base \\\n",
|
||
|
" --train_file data/train.json \\\n",
|
||
|
" --validation_file data/valid.json \\\n",
|
||
|
" --test_file data/test.json \\\n",
|
||
|
" --per_device_train_batch_size 24 \\\n",
|
||
|
" --per_device_eval_batch_size 24 \\\n",
|
||
|
" --max_length 128 \\\n",
|
||
|
" --freeze_model \\\n",
|
||
|
" --custom_model \\\n",
|
||
|
" --learning_rate 2e-5 \\\n",
|
||
|
" --num_train_epochs 1 \\\n",
|
||
|
" --output_dir out/tweet/roberta_version_3"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Roberta version 4"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 8,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"02/16/2022 01:06:31 - INFO - __main__ - Distributed environment: NO\n",
|
||
|
"Num processes: 1\n",
|
||
|
"Process index: 0\n",
|
||
|
"Local process index: 0\n",
|
||
|
"Device: cpu\n",
|
||
|
"Use FP16 precision: False\n",
|
||
|
"\n",
|
||
|
"02/16/2022 01:06:32 - WARNING - datasets.builder - Using custom data configuration default-67c9d932a627b7b8\n",
|
||
|
"02/16/2022 01:06:32 - WARNING - datasets.builder - Reusing dataset json (C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426)\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/3 [00:00<?, ?it/s]\n",
|
||
|
"100%|##########| 3/3 [00:00<00:00, 1507.84it/s]\n",
|
||
|
"loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b\n",
|
||
|
"Model config RobertaConfig {\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"RobertaForMaskedLM\"\n",
|
||
|
" ],\n",
|
||
|
" \"attention_probs_dropout_prob\": 0.1,\n",
|
||
|
" \"bos_token_id\": 0,\n",
|
||
|
" \"classifier_dropout\": null,\n",
|
||
|
" \"eos_token_id\": 2,\n",
|
||
|
" \"hidden_act\": \"gelu\",\n",
|
||
|
" \"hidden_dropout_prob\": 0.1,\n",
|
||
|
" \"hidden_size\": 768,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"intermediate_size\": 3072,\n",
|
||
|
" \"layer_norm_eps\": 1e-05,\n",
|
||
|
" \"max_position_embeddings\": 514,\n",
|
||
|
" \"model_type\": \"roberta\",\n",
|
||
|
" \"num_attention_heads\": 12,\n",
|
||
|
" \"num_hidden_layers\": 12,\n",
|
||
|
" \"pad_token_id\": 1,\n",
|
||
|
" \"position_embedding_type\": \"absolute\",\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"type_vocab_size\": 1,\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50265\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
|
||
|
"loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b\n",
|
||
|
"Model config RobertaConfig {\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"RobertaForMaskedLM\"\n",
|
||
|
" ],\n",
|
||
|
" \"attention_probs_dropout_prob\": 0.1,\n",
|
||
|
" \"bos_token_id\": 0,\n",
|
||
|
" \"classifier_dropout\": null,\n",
|
||
|
" \"eos_token_id\": 2,\n",
|
||
|
" \"hidden_act\": \"gelu\",\n",
|
||
|
" \"hidden_dropout_prob\": 0.1,\n",
|
||
|
" \"hidden_size\": 768,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"intermediate_size\": 3072,\n",
|
||
|
" \"layer_norm_eps\": 1e-05,\n",
|
||
|
" \"max_position_embeddings\": 514,\n",
|
||
|
" \"model_type\": \"roberta\",\n",
|
||
|
" \"num_attention_heads\": 12,\n",
|
||
|
" \"num_hidden_layers\": 12,\n",
|
||
|
" \"pad_token_id\": 1,\n",
|
||
|
" \"position_embedding_type\": \"absolute\",\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"type_vocab_size\": 1,\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50265\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/vocab.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\d3ccdbfeb9aaa747ef20432d4976c32ee3fa69663b379deb253ccfce2bb1fdc5.d67d6b367eb24ab43b08ad55e014cf254076934f71d832bbab9ad35644a375ab\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/merges.txt from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\cafdecc90fcab17011e12ac813dd574b4b3fea39da6dd817813efa010262ff3f.5d12962c5ee615a4c803841266e9c3be9a691a924f72d395d3a6c6c81157788b\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/tokenizer.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\d53fc0fa09b8342651efd4073d75e19617b3e51287c2a535becda5808a8db287.fc9576039592f026ad76a1c231b89aee8668488c671dfbe6616bab2ed298d730\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/added_tokens.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/special_tokens_map.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/roberta-base/resolve/main/tokenizer_config.json from cache at None\n",
|
||
|
"loading configuration file https://huggingface.co/roberta-base/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\733bade19e5f0ce98e6531021dd5180994bb2f7b8bd7e80c7968805834ba351e.35205c6cfc956461d8515139f0f8dd5d207a2f336c0c3a83b4bc8dca3518e37b\n",
|
||
|
"Model config RobertaConfig {\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"RobertaForMaskedLM\"\n",
|
||
|
" ],\n",
|
||
|
" \"attention_probs_dropout_prob\": 0.1,\n",
|
||
|
" \"bos_token_id\": 0,\n",
|
||
|
" \"classifier_dropout\": null,\n",
|
||
|
" \"eos_token_id\": 2,\n",
|
||
|
" \"hidden_act\": \"gelu\",\n",
|
||
|
" \"hidden_dropout_prob\": 0.1,\n",
|
||
|
" \"hidden_size\": 768,\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"intermediate_size\": 3072,\n",
|
||
|
" \"layer_norm_eps\": 1e-05,\n",
|
||
|
" \"max_position_embeddings\": 514,\n",
|
||
|
" \"model_type\": \"roberta\",\n",
|
||
|
" \"num_attention_heads\": 12,\n",
|
||
|
" \"num_hidden_layers\": 12,\n",
|
||
|
" \"pad_token_id\": 1,\n",
|
||
|
" \"position_embedding_type\": \"absolute\",\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"type_vocab_size\": 1,\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50265\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"02/16/2022 01:06:38 - INFO - __main__ - Return hidden states from model: True\n",
|
||
|
"02/16/2022 01:06:38 - INFO - __main__ - Using implementation from: RobertaForSequenceClassificationCustomAlternative\n",
|
||
|
"loading weights file https://huggingface.co/roberta-base/resolve/main/pytorch_model.bin from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\51ba668f7ff34e7cdfa9561e8361747738113878850a7d717dbc69de8683aaad.c7efaa30a0d80b2958b876969faa180e485944a849deee4ad482332de65365a7\n",
|
||
|
"Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassificationCustomAlternative: ['lm_head.decoder.weight', 'roberta.pooler.dense.bias', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.weight']\n",
|
||
|
"- This IS expected if you are initializing RobertaForSequenceClassificationCustomAlternative from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
|
||
|
"- This IS NOT expected if you are initializing RobertaForSequenceClassificationCustomAlternative from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
|
||
|
"Some weights of RobertaForSequenceClassificationCustomAlternative were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense_1_hidden.weight', 'classifier.dense_1_hidden.bias', 'classifier.dense_1_input.bias', 'classifier.dense_2.bias', 'classifier.dense_2.weight', 'classifier.out_proj.weight', 'classifier.dense_1_input.weight', 'classifier.out_proj.bias']\n",
|
||
|
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
||
|
"02/16/2022 01:06:40 - INFO - __main__ - Freezing model weights\n",
|
||
|
"02/16/2022 01:06:40 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-73165df4ba3ef6cf.arrow\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/1 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 1/1 [00:00<00:00, 31.33ba/s]\n",
|
||
|
"02/16/2022 01:06:40 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-67c9d932a627b7b8\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-015ce493f6b049f3.arrow\n",
|
||
|
"02/16/2022 01:06:40 - INFO - __main__ - Sample 3979 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [0, 1039, 12105, 787, 12105, 787, 12105, 787, 12105, 787, 12105, 45365, 5, 2526, 9, 84, 184, 1269, 4, 1437, 1437, 2], 'labels': 0}.\n",
|
||
|
"02/16/2022 01:06:40 - INFO - __main__ - Sample 2415 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [0, 10669, 99, 84, 247, 439, 149, 42, 94, 76, 7, 192, 82, 836, 10, 22, 27076, 113, 7, 5, 4773, 359, 3914, 131, 283, 259, 13, 960, 53, 1037, 1437, 1437, 2], 'labels': 0}.\n",
|
||
|
"02/16/2022 01:06:40 - INFO - __main__ - Sample 2136 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [0, 1039, 12105, 849, 41468, 1809, 4473, 20126, 849, 41468, 1809, 4742, 21929, 1809, 849, 41468, 1809, 119, 1350, 90, 428, 4759, 415, 596, 1437, 849, 31336, 28465, 16, 8266, 1437, 787, 12105, 2], 'labels': 1}.\n",
|
||
|
"02/16/2022 01:06:41 - INFO - __main__ - ***** Running training *****\n",
|
||
|
"02/16/2022 01:06:41 - INFO - __main__ - Num examples = 4742\n",
|
||
|
"02/16/2022 01:06:41 - INFO - __main__ - Num Epochs = 1\n",
|
||
|
"02/16/2022 01:06:41 - INFO - __main__ - Instantaneous batch size per device = 24\n",
|
||
|
"02/16/2022 01:06:41 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 24\n",
|
||
|
"02/16/2022 01:06:41 - INFO - __main__ - Gradient Accumulation steps = 1\n",
|
||
|
"02/16/2022 01:06:41 - INFO - __main__ - Total optimization steps = 198\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/198 [00:00<?, ?it/s]\n",
|
||
|
" 1%| | 1/198 [00:02<07:48, 2.38s/it]\n",
|
||
|
" 1%|1 | 2/198 [00:04<07:47, 2.39s/it]\n",
|
||
|
" 2%|1 | 3/198 [00:06<06:30, 2.00s/it]\n",
|
||
|
" 2%|2 | 4/198 [00:07<04:55, 1.52s/it]\n",
|
||
|
" 3%|2 | 5/198 [00:08<04:20, 1.35s/it]\n",
|
||
|
" 3%|3 | 6/198 [00:09<04:16, 1.34s/it]\n",
|
||
|
" 4%|3 | 7/198 [00:10<04:14, 1.33s/it]\n",
|
||
|
" 4%|4 | 8/198 [00:12<04:12, 1.33s/it]\n",
|
||
|
" 5%|4 | 9/198 [00:13<03:49, 1.22s/it]\n",
|
||
|
" 5%|5 | 10/198 [00:14<03:40, 1.17s/it]\n",
|
||
|
" 6%|5 | 11/198 [00:15<03:24, 1.09s/it]\n",
|
||
|
" 6%|6 | 12/198 [00:15<03:07, 1.01s/it]\n",
|
||
|
" 7%|6 | 13/198 [00:16<03:09, 1.02s/it]\n",
|
||
|
" 7%|7 | 14/198 [00:17<03:03, 1.00it/s]\n",
|
||
|
" 8%|7 | 15/198 [00:18<02:56, 1.04it/s]\n",
|
||
|
" 8%|8 | 16/198 [00:19<02:49, 1.08it/s]\n",
|
||
|
" 9%|8 | 17/198 [00:20<02:56, 1.03it/s]\n",
|
||
|
" 9%|9 | 18/198 [00:21<02:47, 1.07it/s]\n",
|
||
|
" 10%|9 | 19/198 [00:22<02:53, 1.03it/s]\n",
|
||
|
" 10%|# | 20/198 [00:24<03:27, 1.16s/it]\n",
|
||
|
" 11%|# | 21/198 [00:24<03:01, 1.03s/it]\n",
|
||
|
" 11%|#1 | 22/198 [00:26<03:34, 1.22s/it]\n",
|
||
|
" 12%|#1 | 23/198 [00:27<03:20, 1.14s/it]\n",
|
||
|
" 12%|#2 | 24/198 [00:28<03:08, 1.08s/it]\n",
|
||
|
" 13%|#2 | 25/198 [00:29<03:17, 1.14s/it]\n",
|
||
|
" 13%|#3 | 26/198 [00:31<03:34, 1.24s/it]\n",
|
||
|
" 14%|#3 | 27/198 [00:32<03:38, 1.28s/it]\n",
|
||
|
" 14%|#4 | 28/198 [00:33<03:16, 1.16s/it]\n",
|
||
|
" 15%|#4 | 29/198 [00:34<03:02, 1.08s/it]\n",
|
||
|
" 15%|#5 | 30/198 [00:35<02:55, 1.05s/it]\n",
|
||
|
" 16%|#5 | 31/198 [00:36<02:43, 1.02it/s]\n",
|
||
|
" 16%|#6 | 32/198 [00:37<02:48, 1.01s/it]\n",
|
||
|
" 17%|#6 | 33/198 [00:38<02:51, 1.04s/it]\n",
|
||
|
" 17%|#7 | 34/198 [00:39<02:37, 1.04it/s]\n",
|
||
|
" 18%|#7 | 35/198 [00:39<02:29, 1.09it/s]\n",
|
||
|
" 18%|#8 | 36/198 [00:41<03:20, 1.23s/it]\n",
|
||
|
" 19%|#8 | 37/198 [00:44<04:16, 1.60s/it]\n",
|
||
|
" 19%|#9 | 38/198 [00:45<03:42, 1.39s/it]\n",
|
||
|
" 20%|#9 | 39/198 [00:46<03:35, 1.36s/it]\n",
|
||
|
" 20%|## | 40/198 [00:48<03:39, 1.39s/it]\n",
|
||
|
" 21%|## | 41/198 [00:48<03:16, 1.25s/it]\n",
|
||
|
" 21%|##1 | 42/198 [00:50<03:28, 1.34s/it]\n",
|
||
|
" 22%|##1 | 43/198 [00:52<03:57, 1.53s/it]\n",
|
||
|
" 22%|##2 | 44/198 [00:53<03:30, 1.37s/it]\n",
|
||
|
" 23%|##2 | 45/198 [00:54<03:05, 1.21s/it]\n",
|
||
|
" 23%|##3 | 46/198 [00:55<02:46, 1.09s/it]\n",
|
||
|
" 24%|##3 | 47/198 [00:55<02:30, 1.00it/s]\n",
|
||
|
" 24%|##4 | 48/198 [00:56<02:20, 1.07it/s]\n",
|
||
|
" 25%|##4 | 49/198 [00:57<02:11, 1.13it/s]\n",
|
||
|
" 25%|##5 | 50/198 [00:58<02:15, 1.09it/s]\n",
|
||
|
" 26%|##5 | 51/198 [00:59<02:09, 1.13it/s]\n",
|
||
|
" 26%|##6 | 52/198 [01:00<02:24, 1.01it/s]\n",
|
||
|
" 27%|##6 | 53/198 [01:01<02:11, 1.10it/s]\n",
|
||
|
" 27%|##7 | 54/198 [01:02<02:15, 1.06it/s]\n",
|
||
|
" 28%|##7 | 55/198 [01:03<02:28, 1.04s/it]\n",
|
||
|
" 28%|##8 | 56/198 [01:04<02:14, 1.06it/s]\n",
|
||
|
" 29%|##8 | 57/198 [01:05<02:30, 1.07s/it]\n",
|
||
|
" 29%|##9 | 58/198 [01:06<02:24, 1.03s/it]\n",
|
||
|
" 30%|##9 | 59/198 [01:07<02:35, 1.12s/it]\n",
|
||
|
" 30%|### | 60/198 [01:08<02:27, 1.07s/it]\n",
|
||
|
" 31%|### | 61/198 [01:10<02:35, 1.14s/it]\n",
|
||
|
" 31%|###1 | 62/198 [01:11<02:43, 1.20s/it]\n",
|
||
|
" 32%|###1 | 63/198 [01:12<02:48, 1.25s/it]\n",
|
||
|
" 32%|###2 | 64/198 [01:13<02:36, 1.17s/it]\n",
|
||
|
" 33%|###2 | 65/198 [01:14<02:15, 1.02s/it]\n",
|
||
|
" 33%|###3 | 66/198 [01:16<02:40, 1.22s/it]\n",
|
||
|
" 34%|###3 | 67/198 [01:18<03:20, 1.53s/it]\n",
|
||
|
" 34%|###4 | 68/198 [01:19<02:54, 1.35s/it]\n",
|
||
|
" 35%|###4 | 69/198 [01:20<03:02, 1.41s/it]\n",
|
||
|
" 35%|###5 | 70/198 [01:21<02:44, 1.29s/it]\n",
|
||
|
" 36%|###5 | 71/198 [01:22<02:27, 1.16s/it]\n",
|
||
|
" 36%|###6 | 72/198 [01:23<02:23, 1.14s/it]\n",
|
||
|
" 37%|###6 | 73/198 [01:25<02:25, 1.16s/it]\n",
|
||
|
" 37%|###7 | 74/198 [01:26<02:32, 1.23s/it]\n",
|
||
|
" 38%|###7 | 75/198 [01:27<02:15, 1.10s/it]\n",
|
||
|
" 38%|###8 | 76/198 [01:29<03:03, 1.50s/it]\n",
|
||
|
" 39%|###8 | 77/198 [01:30<02:47, 1.39s/it]\n",
|
||
|
" 39%|###9 | 78/198 [01:31<02:28, 1.24s/it]\n",
|
||
|
" 40%|###9 | 79/198 [01:32<02:12, 1.11s/it]\n",
|
||
|
" 40%|#### | 80/198 [01:33<02:08, 1.09s/it]\n",
|
||
|
" 41%|#### | 81/198 [01:34<01:56, 1.01it/s]\n",
|
||
|
" 41%|####1 | 82/198 [01:35<01:46, 1.09it/s]\n",
|
||
|
" 42%|####1 | 83/198 [01:36<01:52, 1.02it/s]\n",
|
||
|
" 42%|####2 | 84/198 [01:36<01:46, 1.07it/s]\n",
|
||
|
" 43%|####2 | 85/198 [01:37<01:41, 1.11it/s]\n",
|
||
|
" 43%|####3 | 86/198 [01:38<01:38, 1.14it/s]\n",
|
||
|
" 44%|####3 | 87/198 [01:41<02:30, 1.35s/it]\n",
|
||
|
" 44%|####4 | 88/198 [01:42<02:35, 1.41s/it]\n",
|
||
|
" 45%|####4 | 89/198 [01:43<02:15, 1.25s/it]\n",
|
||
|
" 45%|####5 | 90/198 [01:44<01:59, 1.10s/it]\n",
|
||
|
" 46%|####5 | 91/198 [01:46<02:41, 1.51s/it]\n",
|
||
|
" 46%|####6 | 92/198 [01:47<02:21, 1.34s/it]\n",
|
||
|
" 47%|####6 | 93/198 [01:48<02:09, 1.23s/it]\n",
|
||
|
" 47%|####7 | 94/198 [01:49<02:01, 1.17s/it]\n",
|
||
|
" 48%|####7 | 95/198 [01:50<01:52, 1.09s/it]\n",
|
||
|
" 48%|####8 | 96/198 [01:52<02:05, 1.23s/it]\n",
|
||
|
" 49%|####8 | 97/198 [01:52<01:51, 1.10s/it]\n",
|
||
|
" 49%|####9 | 98/198 [01:53<01:41, 1.02s/it]\n",
|
||
|
" 50%|##### | 99/198 [01:54<01:38, 1.01it/s]\n",
|
||
|
" 51%|##### | 100/198 [01:55<01:35, 1.03it/s]\n",
|
||
|
" 51%|#####1 | 101/198 [01:56<01:45, 1.09s/it]\n",
|
||
|
" 52%|#####1 | 102/198 [01:58<01:47, 1.12s/it]\n",
|
||
|
" 52%|#####2 | 103/198 [01:58<01:38, 1.03s/it]\n",
|
||
|
" 53%|#####2 | 104/198 [02:00<01:45, 1.12s/it]\n",
|
||
|
" 53%|#####3 | 105/198 [02:01<01:54, 1.23s/it]\n",
|
||
|
" 54%|#####3 | 106/198 [02:02<01:48, 1.18s/it]\n",
|
||
|
" 54%|#####4 | 107/198 [02:03<01:41, 1.12s/it]\n",
|
||
|
" 55%|#####4 | 108/198 [02:04<01:30, 1.00s/it]\n",
|
||
|
" 55%|#####5 | 109/198 [02:05<01:26, 1.03it/s]\n",
|
||
|
" 56%|#####5 | 110/198 [02:06<01:34, 1.07s/it]\n",
|
||
|
" 56%|#####6 | 111/198 [02:07<01:33, 1.08s/it]\n",
|
||
|
" 57%|#####6 | 112/198 [02:08<01:33, 1.09s/it]\n",
|
||
|
" 57%|#####7 | 113/198 [02:09<01:26, 1.02s/it]\n",
|
||
|
" 58%|#####7 | 114/198 [02:11<01:34, 1.12s/it]\n",
|
||
|
" 58%|#####8 | 115/198 [02:11<01:25, 1.03s/it]\n",
|
||
|
" 59%|#####8 | 116/198 [02:13<01:31, 1.12s/it]\n",
|
||
|
" 59%|#####9 | 117/198 [02:14<01:34, 1.16s/it]\n",
|
||
|
" 60%|#####9 | 118/198 [02:15<01:28, 1.11s/it]\n",
|
||
|
" 60%|###### | 119/198 [02:16<01:31, 1.16s/it]\n",
|
||
|
" 61%|###### | 120/198 [02:18<01:34, 1.21s/it]\n",
|
||
|
" 61%|######1 | 121/198 [02:19<01:26, 1.12s/it]\n",
|
||
|
" 62%|######1 | 122/198 [02:20<01:22, 1.09s/it]\n",
|
||
|
" 62%|######2 | 123/198 [02:21<01:26, 1.15s/it]\n",
|
||
|
" 63%|######2 | 124/198 [02:22<01:28, 1.20s/it]\n",
|
||
|
" 63%|######3 | 125/198 [02:25<01:55, 1.59s/it]\n",
|
||
|
" 64%|######3 | 126/198 [02:27<02:00, 1.67s/it]\n",
|
||
|
" 64%|######4 | 127/198 [02:27<01:37, 1.38s/it]\n",
|
||
|
" 65%|######4 | 128/198 [02:29<01:35, 1.37s/it]\n",
|
||
|
" 65%|######5 | 129/198 [02:30<01:33, 1.35s/it]\n",
|
||
|
" 66%|######5 | 130/198 [02:31<01:21, 1.21s/it]\n",
|
||
|
" 66%|######6 | 131/198 [02:32<01:11, 1.06s/it]\n",
|
||
|
" 67%|######6 | 132/198 [02:32<01:07, 1.02s/it]\n",
|
||
|
" 67%|######7 | 133/198 [02:34<01:16, 1.17s/it]\n",
|
||
|
" 68%|######7 | 134/198 [02:35<01:13, 1.14s/it]\n",
|
||
|
" 68%|######8 | 135/198 [02:37<01:28, 1.40s/it]\n",
|
||
|
" 69%|######8 | 136/198 [02:38<01:15, 1.22s/it]\n",
|
||
|
" 69%|######9 | 137/198 [02:39<01:17, 1.27s/it]\n",
|
||
|
" 70%|######9 | 138/198 [02:40<01:14, 1.24s/it]\n",
|
||
|
" 70%|####### | 139/198 [02:42<01:13, 1.25s/it]\n",
|
||
|
" 71%|####### | 140/198 [02:44<01:31, 1.58s/it]\n",
|
||
|
" 71%|#######1 | 141/198 [02:45<01:15, 1.33s/it]\n",
|
||
|
" 72%|#######1 | 142/198 [02:46<01:11, 1.28s/it]\n",
|
||
|
" 72%|#######2 | 143/198 [02:48<01:28, 1.60s/it]\n",
|
||
|
" 73%|#######2 | 144/198 [02:49<01:16, 1.42s/it]\n",
|
||
|
" 73%|#######3 | 145/198 [02:51<01:14, 1.40s/it]\n",
|
||
|
" 74%|#######3 | 146/198 [02:51<01:03, 1.23s/it]\n",
|
||
|
" 74%|#######4 | 147/198 [02:52<00:55, 1.09s/it]\n",
|
||
|
" 75%|#######4 | 148/198 [02:53<00:51, 1.02s/it]\n",
|
||
|
" 75%|#######5 | 149/198 [02:54<00:49, 1.01s/it]\n",
|
||
|
" 76%|#######5 | 150/198 [02:55<00:46, 1.04it/s]\n",
|
||
|
" 76%|#######6 | 151/198 [02:56<00:44, 1.07it/s]\n",
|
||
|
" 77%|#######6 | 152/198 [02:57<00:48, 1.05s/it]\n",
|
||
|
" 77%|#######7 | 153/198 [02:58<00:49, 1.11s/it]\n",
|
||
|
" 78%|#######7 | 154/198 [02:59<00:44, 1.01s/it]\n",
|
||
|
" 78%|#######8 | 155/198 [03:00<00:41, 1.04it/s]\n",
|
||
|
" 79%|#######8 | 156/198 [03:01<00:44, 1.06s/it]\n",
|
||
|
" 79%|#######9 | 157/198 [03:02<00:40, 1.02it/s]\n",
|
||
|
" 80%|#######9 | 158/198 [03:03<00:40, 1.01s/it]\n",
|
||
|
" 80%|######## | 159/198 [03:04<00:43, 1.10s/it]\n",
|
||
|
" 81%|######## | 160/198 [03:06<00:42, 1.11s/it]\n",
|
||
|
" 81%|########1 | 161/198 [03:07<00:38, 1.04s/it]\n",
|
||
|
" 82%|########1 | 162/198 [03:08<00:38, 1.06s/it]\n",
|
||
|
" 82%|########2 | 163/198 [03:09<00:45, 1.31s/it]\n",
|
||
|
" 83%|########2 | 164/198 [03:10<00:40, 1.19s/it]\n",
|
||
|
" 83%|########3 | 165/198 [03:11<00:36, 1.10s/it]\n",
|
||
|
" 84%|########3 | 166/198 [03:12<00:32, 1.01s/it]\n",
|
||
|
" 84%|########4 | 167/198 [03:13<00:33, 1.07s/it]\n",
|
||
|
" 85%|########4 | 168/198 [03:15<00:34, 1.14s/it]\n",
|
||
|
" 85%|########5 | 169/198 [03:16<00:31, 1.10s/it]\n",
|
||
|
" 86%|########5 | 170/198 [03:17<00:34, 1.22s/it]\n",
|
||
|
" 86%|########6 | 171/198 [03:18<00:33, 1.25s/it]\n",
|
||
|
" 87%|########6 | 172/198 [03:19<00:30, 1.16s/it]\n",
|
||
|
" 87%|########7 | 173/198 [03:21<00:35, 1.43s/it]\n",
|
||
|
" 88%|########7 | 174/198 [03:23<00:33, 1.39s/it]\n",
|
||
|
" 88%|########8 | 175/198 [03:24<00:33, 1.46s/it]\n",
|
||
|
" 89%|########8 | 176/198 [03:26<00:31, 1.41s/it]\n",
|
||
|
" 89%|########9 | 177/198 [03:27<00:29, 1.40s/it]\n",
|
||
|
" 90%|########9 | 178/198 [03:28<00:27, 1.37s/it]\n",
|
||
|
" 90%|######### | 179/198 [03:30<00:25, 1.32s/it]\n",
|
||
|
" 91%|######### | 180/198 [03:30<00:21, 1.17s/it]\n",
|
||
|
" 91%|#########1| 181/198 [03:31<00:18, 1.10s/it]\n",
|
||
|
" 92%|#########1| 182/198 [03:33<00:18, 1.13s/it]\n",
|
||
|
" 92%|#########2| 183/198 [03:33<00:15, 1.05s/it]\n",
|
||
|
" 93%|#########2| 184/198 [03:35<00:16, 1.15s/it]\n",
|
||
|
" 93%|#########3| 185/198 [03:36<00:13, 1.06s/it]\n",
|
||
|
" 94%|#########3| 186/198 [03:37<00:13, 1.15s/it]\n",
|
||
|
" 94%|#########4| 187/198 [03:38<00:11, 1.03s/it]\n",
|
||
|
" 95%|#########4| 188/198 [03:39<00:10, 1.03s/it]\n",
|
||
|
" 95%|#########5| 189/198 [03:40<00:09, 1.02s/it]\n",
|
||
|
" 96%|#########5| 190/198 [03:41<00:08, 1.10s/it]\n",
|
||
|
" 96%|#########6| 191/198 [03:42<00:08, 1.15s/it]\n",
|
||
|
" 97%|#########6| 192/198 [03:44<00:07, 1.21s/it]\n",
|
||
|
" 97%|#########7| 193/198 [03:45<00:05, 1.18s/it]\n",
|
||
|
" 98%|#########7| 194/198 [03:46<00:04, 1.08s/it]\n",
|
||
|
" 98%|#########8| 195/198 [03:46<00:02, 1.00it/s]\n",
|
||
|
" 99%|#########8| 196/198 [03:47<00:01, 1.04it/s]\n",
|
||
|
" 99%|#########9| 197/198 [03:48<00:00, 1.05it/s]\n",
|
||
|
"100%|##########| 198/198 [03:49<00:00, 1.27it/s]02/16/2022 01:10:58 - INFO - __main__ - Epoch 0: {'accuracy': 0.938}\n",
|
||
|
"02/16/2022 01:11:22 - INFO - __main__ - Test-set evaluation: {'accuracy': 1.0}\n",
|
||
|
"Configuration saved in out/tweet/roberta_version_4\\config.json\n",
|
||
|
"Model weights saved in out/tweet/roberta_version_4\\pytorch_model.bin\n",
|
||
|
"tokenizer config file saved in out/tweet/roberta_version_4\\tokenizer_config.json\n",
|
||
|
"Special tokens file saved in out/tweet/roberta_version_4\\special_tokens_map.json\n",
|
||
|
"\n",
|
||
|
"100%|##########| 198/198 [04:40<00:00, 1.42s/it]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"!python run_glue_no_trainer.py \\\n",
|
||
|
" --model_name_or_path roberta-base \\\n",
|
||
|
" --train_file data/train.json \\\n",
|
||
|
" --validation_file data/valid.json \\\n",
|
||
|
" --test_file data/test.json \\\n",
|
||
|
" --per_device_train_batch_size 24 \\\n",
|
||
|
" --per_device_eval_batch_size 24 \\\n",
|
||
|
" --max_length 128 \\\n",
|
||
|
" --freeze_model \\\n",
|
||
|
" --custom_model \\\n",
|
||
|
" --return_hidden_states \\\n",
|
||
|
" --learning_rate 2e-5 \\\n",
|
||
|
" --num_train_epochs 1 \\\n",
|
||
|
" --output_dir out/tweet/roberta_version_4"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# T5"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 16,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"02/17/2022 17:13:52 - INFO - __main__ - Distributed environment: NO\n",
|
||
|
"Num processes: 1\n",
|
||
|
"Process index: 0\n",
|
||
|
"Local process index: 0\n",
|
||
|
"Device: cpu\n",
|
||
|
"Use FP16 precision: False\n",
|
||
|
"\n",
|
||
|
"02/17/2022 17:13:53 - WARNING - datasets.builder - Using custom data configuration default-c1907d9305fb2fbb\n",
|
||
|
"02/17/2022 17:13:53 - WARNING - datasets.builder - Reusing dataset json (C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-c1907d9305fb2fbb\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426)\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/3 [00:00<?, ?it/s]\n",
|
||
|
"100%|##########| 3/3 [00:00<00:00, 143.23it/s]\n",
|
||
|
"loading configuration file https://huggingface.co/t5-small/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fe501e8fd6425b8ec93df37767fcce78ce626e34cc5edc859c662350cf712e41.406701565c0afd9899544c1cb8b93185a76f00b31e5ce7f6e18bbaef02241985\n",
|
||
|
"Model config T5Config {\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"T5WithLMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"d_ff\": 2048,\n",
|
||
|
" \"d_kv\": 64,\n",
|
||
|
" \"d_model\": 512,\n",
|
||
|
" \"decoder_start_token_id\": 0,\n",
|
||
|
" \"dropout_rate\": 0.1,\n",
|
||
|
" \"eos_token_id\": 1,\n",
|
||
|
" \"feed_forward_proj\": \"relu\",\n",
|
||
|
" \"initializer_factor\": 1.0,\n",
|
||
|
" \"is_encoder_decoder\": true,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-06,\n",
|
||
|
" \"model_type\": \"t5\",\n",
|
||
|
" \"n_positions\": 512,\n",
|
||
|
" \"num_decoder_layers\": 6,\n",
|
||
|
" \"num_heads\": 8,\n",
|
||
|
" \"num_layers\": 6,\n",
|
||
|
" \"output_past\": true,\n",
|
||
|
" \"pad_token_id\": 0,\n",
|
||
|
" \"relative_attention_num_buckets\": 32,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"summarization\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"length_penalty\": 2.0,\n",
|
||
|
" \"max_length\": 200,\n",
|
||
|
" \"min_length\": 30,\n",
|
||
|
" \"no_repeat_ngram_size\": 3,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"summarize: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_de\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to German: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_fr\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to French: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_ro\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to Romanian: \"\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 32128\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
|
||
|
"loading configuration file https://huggingface.co/t5-small/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fe501e8fd6425b8ec93df37767fcce78ce626e34cc5edc859c662350cf712e41.406701565c0afd9899544c1cb8b93185a76f00b31e5ce7f6e18bbaef02241985\n",
|
||
|
"Model config T5Config {\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"T5WithLMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"d_ff\": 2048,\n",
|
||
|
" \"d_kv\": 64,\n",
|
||
|
" \"d_model\": 512,\n",
|
||
|
" \"decoder_start_token_id\": 0,\n",
|
||
|
" \"dropout_rate\": 0.1,\n",
|
||
|
" \"eos_token_id\": 1,\n",
|
||
|
" \"feed_forward_proj\": \"relu\",\n",
|
||
|
" \"initializer_factor\": 1.0,\n",
|
||
|
" \"is_encoder_decoder\": true,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-06,\n",
|
||
|
" \"model_type\": \"t5\",\n",
|
||
|
" \"n_positions\": 512,\n",
|
||
|
" \"num_decoder_layers\": 6,\n",
|
||
|
" \"num_heads\": 8,\n",
|
||
|
" \"num_layers\": 6,\n",
|
||
|
" \"output_past\": true,\n",
|
||
|
" \"pad_token_id\": 0,\n",
|
||
|
" \"relative_attention_num_buckets\": 32,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"summarization\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"length_penalty\": 2.0,\n",
|
||
|
" \"max_length\": 200,\n",
|
||
|
" \"min_length\": 30,\n",
|
||
|
" \"no_repeat_ngram_size\": 3,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"summarize: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_de\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to German: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_fr\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to French: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_ro\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to Romanian: \"\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 32128\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"loading file https://huggingface.co/t5-small/resolve/main/spiece.model from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\65fc04e21f45f61430aea0c4fedffac16a4d20d78b8e6601d8d996ebefefecd2.3b69006860e7b5d0a63ffdddc01ddcd6b7c318a6f4fd793596552c741734c62d\n",
|
||
|
"loading file https://huggingface.co/t5-small/resolve/main/tokenizer.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\06779097c78e12f47ef67ecb728810c2ae757ee0a9efe9390c6419783d99382d.8627f1bd5d270a9fd2e5a51c8bec3223896587cc3cfe13edeabb0992ab43c529\n",
|
||
|
"loading file https://huggingface.co/t5-small/resolve/main/added_tokens.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/t5-small/resolve/main/special_tokens_map.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/t5-small/resolve/main/tokenizer_config.json from cache at None\n",
|
||
|
"loading configuration file https://huggingface.co/t5-small/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fe501e8fd6425b8ec93df37767fcce78ce626e34cc5edc859c662350cf712e41.406701565c0afd9899544c1cb8b93185a76f00b31e5ce7f6e18bbaef02241985\n",
|
||
|
"Model config T5Config {\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"T5WithLMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"d_ff\": 2048,\n",
|
||
|
" \"d_kv\": 64,\n",
|
||
|
" \"d_model\": 512,\n",
|
||
|
" \"decoder_start_token_id\": 0,\n",
|
||
|
" \"dropout_rate\": 0.1,\n",
|
||
|
" \"eos_token_id\": 1,\n",
|
||
|
" \"feed_forward_proj\": \"relu\",\n",
|
||
|
" \"initializer_factor\": 1.0,\n",
|
||
|
" \"is_encoder_decoder\": true,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-06,\n",
|
||
|
" \"model_type\": \"t5\",\n",
|
||
|
" \"n_positions\": 512,\n",
|
||
|
" \"num_decoder_layers\": 6,\n",
|
||
|
" \"num_heads\": 8,\n",
|
||
|
" \"num_layers\": 6,\n",
|
||
|
" \"output_past\": true,\n",
|
||
|
" \"pad_token_id\": 0,\n",
|
||
|
" \"relative_attention_num_buckets\": 32,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"summarization\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"length_penalty\": 2.0,\n",
|
||
|
" \"max_length\": 200,\n",
|
||
|
" \"min_length\": 30,\n",
|
||
|
" \"no_repeat_ngram_size\": 3,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"summarize: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_de\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to German: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_fr\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to French: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_ro\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to Romanian: \"\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 32128\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"loading weights file https://huggingface.co/t5-small/resolve/main/pytorch_model.bin from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fee5a3a0ae379232608b6eed45d2d7a0d2966b9683728838412caccc41b4b0ed.ddacdc89ec88482db20c676f0861a336f3d0409f94748c209847b49529d73885\n",
|
||
|
"All model checkpoint weights were used when initializing T5ForConditionalGeneration.\n",
|
||
|
"\n",
|
||
|
"All the weights of T5ForConditionalGeneration were initialized from the model checkpoint at t5-small.\n",
|
||
|
"If your task is similar to the task the model of the checkpoint was trained on, you can already use T5ForConditionalGeneration for predictions without further training.\n",
|
||
|
"02/17/2022 17:14:00 - INFO - __main__ - Using translation prefix: \"tweet classification: \"\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/5 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 60%|###### | 3/5 [00:00<00:00, 28.92ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 5/5 [00:00<00:00, 32.34ba/s]\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/1 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 1/1 [00:00<00:00, 66.84ba/s]\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/1 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 1/1 [00:00<00:00, 77.13ba/s]\n",
|
||
|
"02/17/2022 17:14:00 - INFO - __main__ - Sample 2469 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [10657, 13774, 10, 62, 33, 1095, 385, 151, 7, 3, 2, 1095, 1024, 9632, 151, 1713, 9229, 324, 1713, 2138, 1713, 19699, 9229, 324, 1439, 2, 1], 'labels': [150, 5591, 1]}.\n",
|
||
|
"02/17/2022 17:14:00 - INFO - __main__ - Sample 3112 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [10657, 13774, 10, 175, 3075, 56, 129, 25, 2787, 21, 8, 647, 1439, 2, 1713, 3470, 1713, 28984, 1713, 89, 76, 2693, 1713, 14814, 1], 'labels': [150, 5591, 1]}.\n",
|
||
|
"02/17/2022 17:14:00 - INFO - __main__ - Sample 1243 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [10657, 13774, 10, 3320, 10041, 125, 31, 7, 8, 1750, 344, 3, 9, 528, 210, 11, 3, 9, 6871, 58, 3, 9, 6871, 744, 31, 17, 3, 7, 11763, 16, 8, 4836, 5, 10802, 7, 1713, 1924, 210, 1273, 1927, 1050, 1439, 2, 1], 'labels': [5591, 1]}.\n",
|
||
|
"\n",
|
||
|
"Downloading: 0%| | 0.00/2.37k [00:00<?, ?B/s]\n",
|
||
|
"Downloading: 5.67kB [00:00, 1.42MB/s] \n",
|
||
|
"02/17/2022 17:14:02 - INFO - __main__ - ***** Running training *****\n",
|
||
|
"02/17/2022 17:14:02 - INFO - __main__ - Num examples = 4742\n",
|
||
|
"02/17/2022 17:14:02 - INFO - __main__ - Num Epochs = 1\n",
|
||
|
"02/17/2022 17:14:02 - INFO - __main__ - Instantaneous batch size per device = 16\n",
|
||
|
"02/17/2022 17:14:02 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 16\n",
|
||
|
"02/17/2022 17:14:02 - INFO - __main__ - Gradient Accumulation steps = 1\n",
|
||
|
"02/17/2022 17:14:02 - INFO - __main__ - Total optimization steps = 297\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/297 [00:00<?, ?it/s]\n",
|
||
|
" 0%| | 1/297 [00:00<04:27, 1.11it/s]\n",
|
||
|
" 1%| | 2/297 [00:01<04:16, 1.15it/s]\n",
|
||
|
" 1%|1 | 3/297 [00:02<04:22, 1.12it/s]\n",
|
||
|
" 1%|1 | 4/297 [00:03<04:21, 1.12it/s]\n",
|
||
|
" 2%|1 | 5/297 [00:04<04:51, 1.00it/s]\n",
|
||
|
" 2%|2 | 6/297 [00:05<04:41, 1.04it/s]\n",
|
||
|
" 2%|2 | 7/297 [00:06<04:35, 1.05it/s]\n",
|
||
|
" 3%|2 | 8/297 [00:07<04:21, 1.10it/s]\n",
|
||
|
" 3%|3 | 9/297 [00:08<04:21, 1.10it/s]\n",
|
||
|
" 3%|3 | 10/297 [00:09<04:20, 1.10it/s]\n",
|
||
|
" 4%|3 | 11/297 [00:10<04:14, 1.12it/s]\n",
|
||
|
" 4%|4 | 12/297 [00:11<04:19, 1.10it/s]\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
" 4%|4 | 13/297 [00:11<04:15, 1.11it/s]\n",
|
||
|
" 5%|4 | 14/297 [00:12<04:11, 1.12it/s]\n",
|
||
|
" 5%|5 | 15/297 [00:13<04:07, 1.14it/s]\n",
|
||
|
" 5%|5 | 16/297 [00:14<04:14, 1.10it/s]\n",
|
||
|
" 6%|5 | 17/297 [00:15<04:07, 1.13it/s]\n",
|
||
|
" 6%|6 | 18/297 [00:16<04:10, 1.11it/s]\n",
|
||
|
" 6%|6 | 19/297 [00:17<04:22, 1.06it/s]\n",
|
||
|
" 7%|6 | 20/297 [00:18<04:17, 1.07it/s]\n",
|
||
|
" 7%|7 | 21/297 [00:19<04:19, 1.07it/s]\n",
|
||
|
" 7%|7 | 22/297 [00:20<04:12, 1.09it/s]\n",
|
||
|
" 8%|7 | 23/297 [00:20<04:03, 1.12it/s]\n",
|
||
|
" 8%|8 | 24/297 [00:21<04:01, 1.13it/s]\n",
|
||
|
" 8%|8 | 25/297 [00:22<04:02, 1.12it/s]\n",
|
||
|
" 9%|8 | 26/297 [00:23<04:05, 1.10it/s]\n",
|
||
|
" 9%|9 | 27/297 [00:24<04:01, 1.12it/s]\n",
|
||
|
" 9%|9 | 28/297 [00:25<03:52, 1.16it/s]\n",
|
||
|
" 10%|9 | 29/297 [00:26<03:49, 1.17it/s]\n",
|
||
|
" 10%|# | 30/297 [00:27<03:52, 1.15it/s]\n",
|
||
|
" 10%|# | 31/297 [00:27<03:43, 1.19it/s]\n",
|
||
|
" 11%|# | 32/297 [00:28<03:35, 1.23it/s]\n",
|
||
|
" 11%|#1 | 33/297 [00:29<03:35, 1.23it/s]\n",
|
||
|
" 11%|#1 | 34/297 [00:30<03:30, 1.25it/s]\n",
|
||
|
" 12%|#1 | 35/297 [00:30<03:31, 1.24it/s]\n",
|
||
|
" 12%|#2 | 36/297 [00:31<03:35, 1.21it/s]\n",
|
||
|
" 12%|#2 | 37/297 [00:32<03:30, 1.23it/s]\n",
|
||
|
" 13%|#2 | 38/297 [00:33<03:44, 1.15it/s]\n",
|
||
|
" 13%|#3 | 39/297 [00:34<03:45, 1.14it/s]\n",
|
||
|
" 13%|#3 | 40/297 [00:35<04:07, 1.04it/s]\n",
|
||
|
" 14%|#3 | 41/297 [00:36<03:59, 1.07it/s]\n",
|
||
|
" 14%|#4 | 42/297 [00:37<03:53, 1.09it/s]\n",
|
||
|
" 14%|#4 | 43/297 [00:38<03:42, 1.14it/s]\n",
|
||
|
" 15%|#4 | 44/297 [00:39<03:35, 1.17it/s]\n",
|
||
|
" 15%|#5 | 45/297 [00:39<03:39, 1.15it/s]\n",
|
||
|
" 15%|#5 | 46/297 [00:40<03:43, 1.12it/s]\n",
|
||
|
" 16%|#5 | 47/297 [00:41<03:52, 1.08it/s]\n",
|
||
|
" 16%|#6 | 48/297 [00:42<03:50, 1.08it/s]\n",
|
||
|
" 16%|#6 | 49/297 [00:43<03:43, 1.11it/s]\n",
|
||
|
" 17%|#6 | 50/297 [00:44<03:41, 1.12it/s]\n",
|
||
|
" 17%|#7 | 51/297 [00:45<03:35, 1.14it/s]\n",
|
||
|
" 18%|#7 | 52/297 [00:46<03:42, 1.10it/s]\n",
|
||
|
" 18%|#7 | 53/297 [00:47<03:34, 1.14it/s]\n",
|
||
|
" 18%|#8 | 54/297 [00:47<03:26, 1.18it/s]\n",
|
||
|
" 19%|#8 | 55/297 [00:48<03:28, 1.16it/s]\n",
|
||
|
" 19%|#8 | 56/297 [00:49<03:24, 1.18it/s]\n",
|
||
|
" 19%|#9 | 57/297 [00:50<03:19, 1.20it/s]\n",
|
||
|
" 20%|#9 | 58/297 [00:51<03:19, 1.20it/s]\n",
|
||
|
" 20%|#9 | 59/297 [00:52<03:21, 1.18it/s]\n",
|
||
|
" 20%|## | 60/297 [00:53<03:26, 1.15it/s]\n",
|
||
|
" 21%|## | 61/297 [00:53<03:24, 1.16it/s]\n",
|
||
|
" 21%|## | 62/297 [00:55<03:40, 1.06it/s]\n",
|
||
|
" 21%|##1 | 63/297 [00:55<03:38, 1.07it/s]\n",
|
||
|
" 22%|##1 | 64/297 [00:56<03:30, 1.11it/s]\n",
|
||
|
" 22%|##1 | 65/297 [00:57<03:30, 1.10it/s]\n",
|
||
|
" 22%|##2 | 66/297 [00:58<03:22, 1.14it/s]\n",
|
||
|
" 23%|##2 | 67/297 [00:59<03:25, 1.12it/s]\n",
|
||
|
" 23%|##2 | 68/297 [01:00<03:25, 1.12it/s]\n",
|
||
|
" 23%|##3 | 69/297 [01:01<03:23, 1.12it/s]\n",
|
||
|
" 24%|##3 | 70/297 [01:02<03:17, 1.15it/s]\n",
|
||
|
" 24%|##3 | 71/297 [01:02<03:13, 1.17it/s]\n",
|
||
|
" 24%|##4 | 72/297 [01:03<03:10, 1.18it/s]\n",
|
||
|
" 25%|##4 | 73/297 [01:04<03:14, 1.15it/s]\n",
|
||
|
" 25%|##4 | 74/297 [01:05<03:25, 1.08it/s]\n",
|
||
|
" 25%|##5 | 75/297 [01:06<03:20, 1.11it/s]\n",
|
||
|
" 26%|##5 | 76/297 [01:07<03:18, 1.12it/s]\n",
|
||
|
" 26%|##5 | 77/297 [01:08<03:13, 1.14it/s]\n",
|
||
|
" 26%|##6 | 78/297 [01:08<03:04, 1.18it/s]\n",
|
||
|
" 27%|##6 | 79/297 [01:09<03:12, 1.13it/s]\n",
|
||
|
" 27%|##6 | 80/297 [01:10<03:12, 1.12it/s]\n",
|
||
|
" 27%|##7 | 81/297 [01:11<03:09, 1.14it/s]\n",
|
||
|
" 28%|##7 | 82/297 [01:12<03:08, 1.14it/s]\n",
|
||
|
" 28%|##7 | 83/297 [01:13<03:10, 1.12it/s]\n",
|
||
|
" 28%|##8 | 84/297 [01:14<03:13, 1.10it/s]\n",
|
||
|
" 29%|##8 | 85/297 [01:15<03:09, 1.12it/s]\n",
|
||
|
" 29%|##8 | 86/297 [01:16<03:06, 1.13it/s]\n",
|
||
|
" 29%|##9 | 87/297 [01:17<03:05, 1.13it/s]\n",
|
||
|
" 30%|##9 | 88/297 [01:17<03:06, 1.12it/s]\n",
|
||
|
" 30%|##9 | 89/297 [01:18<03:03, 1.14it/s]\n",
|
||
|
" 30%|### | 90/297 [01:19<03:04, 1.12it/s]\n",
|
||
|
" 31%|### | 91/297 [01:20<03:06, 1.10it/s]\n",
|
||
|
" 31%|### | 92/297 [01:21<03:06, 1.10it/s]\n",
|
||
|
" 31%|###1 | 93/297 [01:22<03:08, 1.08it/s]\n",
|
||
|
" 32%|###1 | 94/297 [01:23<03:15, 1.04it/s]\n",
|
||
|
" 32%|###1 | 95/297 [01:24<03:10, 1.06it/s]\n",
|
||
|
" 32%|###2 | 96/297 [01:25<03:05, 1.08it/s]\n",
|
||
|
" 33%|###2 | 97/297 [01:26<03:01, 1.10it/s]\n",
|
||
|
" 33%|###2 | 98/297 [01:27<03:07, 1.06it/s]\n",
|
||
|
" 33%|###3 | 99/297 [01:28<03:02, 1.09it/s]\n",
|
||
|
" 34%|###3 | 100/297 [01:29<02:59, 1.10it/s]\n",
|
||
|
" 34%|###4 | 101/297 [01:29<02:59, 1.09it/s]\n",
|
||
|
" 34%|###4 | 102/297 [01:30<02:56, 1.11it/s]\n",
|
||
|
" 35%|###4 | 103/297 [01:31<02:58, 1.09it/s]\n",
|
||
|
" 35%|###5 | 104/297 [01:32<02:58, 1.08it/s]\n",
|
||
|
" 35%|###5 | 105/297 [01:33<02:56, 1.09it/s]\n",
|
||
|
" 36%|###5 | 106/297 [01:34<02:53, 1.10it/s]\n",
|
||
|
" 36%|###6 | 107/297 [01:35<02:55, 1.08it/s]\n",
|
||
|
" 36%|###6 | 108/297 [01:36<02:51, 1.10it/s]\n",
|
||
|
" 37%|###6 | 109/297 [01:37<02:51, 1.09it/s]\n",
|
||
|
" 37%|###7 | 110/297 [01:38<02:54, 1.07it/s]\n",
|
||
|
" 37%|###7 | 111/297 [01:39<02:51, 1.09it/s]\n",
|
||
|
" 38%|###7 | 112/297 [01:40<02:49, 1.09it/s]\n",
|
||
|
" 38%|###8 | 113/297 [01:40<02:46, 1.10it/s]\n",
|
||
|
" 38%|###8 | 114/297 [01:41<02:43, 1.12it/s]\n",
|
||
|
" 39%|###8 | 115/297 [01:42<02:42, 1.12it/s]\n",
|
||
|
" 39%|###9 | 116/297 [01:43<02:38, 1.14it/s]\n",
|
||
|
" 39%|###9 | 117/297 [01:44<02:39, 1.13it/s]\n",
|
||
|
" 40%|###9 | 118/297 [01:45<02:44, 1.08it/s]\n",
|
||
|
" 40%|#### | 119/297 [01:46<02:41, 1.10it/s]\n",
|
||
|
" 40%|#### | 120/297 [01:47<02:38, 1.12it/s]\n",
|
||
|
" 41%|#### | 121/297 [01:48<02:44, 1.07it/s]\n",
|
||
|
" 41%|####1 | 122/297 [01:49<02:40, 1.09it/s]\n",
|
||
|
" 41%|####1 | 123/297 [01:49<02:36, 1.11it/s]\n",
|
||
|
" 42%|####1 | 124/297 [01:50<02:36, 1.11it/s]\n",
|
||
|
" 42%|####2 | 125/297 [01:51<02:35, 1.11it/s]\n",
|
||
|
" 42%|####2 | 126/297 [01:52<02:35, 1.10it/s]\n",
|
||
|
" 43%|####2 | 127/297 [01:53<02:34, 1.10it/s]\n",
|
||
|
" 43%|####3 | 128/297 [01:54<02:31, 1.12it/s]\n",
|
||
|
" 43%|####3 | 129/297 [01:55<02:35, 1.08it/s]\n",
|
||
|
" 44%|####3 | 130/297 [01:56<02:29, 1.12it/s]\n",
|
||
|
" 44%|####4 | 131/297 [01:57<02:28, 1.12it/s]\n",
|
||
|
" 44%|####4 | 132/297 [01:58<02:28, 1.11it/s]\n",
|
||
|
" 45%|####4 | 133/297 [01:58<02:26, 1.12it/s]\n",
|
||
|
" 45%|####5 | 134/297 [01:59<02:27, 1.11it/s]\n",
|
||
|
" 45%|####5 | 135/297 [02:00<02:27, 1.10it/s]\n",
|
||
|
" 46%|####5 | 136/297 [02:01<02:25, 1.10it/s]\n",
|
||
|
" 46%|####6 | 137/297 [02:02<02:26, 1.09it/s]\n",
|
||
|
" 46%|####6 | 138/297 [02:03<02:22, 1.11it/s]\n",
|
||
|
" 47%|####6 | 139/297 [02:04<02:21, 1.11it/s]\n",
|
||
|
" 47%|####7 | 140/297 [02:05<02:21, 1.11it/s]\n",
|
||
|
" 47%|####7 | 141/297 [02:06<02:23, 1.09it/s]\n",
|
||
|
" 48%|####7 | 142/297 [02:07<02:18, 1.12it/s]\n",
|
||
|
" 48%|####8 | 143/297 [02:07<02:17, 1.12it/s]\n",
|
||
|
" 48%|####8 | 144/297 [02:08<02:14, 1.14it/s]\n",
|
||
|
" 49%|####8 | 145/297 [02:09<02:14, 1.13it/s]\n",
|
||
|
" 49%|####9 | 146/297 [02:10<02:11, 1.15it/s]\n",
|
||
|
" 49%|####9 | 147/297 [02:11<02:12, 1.13it/s]\n",
|
||
|
" 50%|####9 | 148/297 [02:12<02:10, 1.14it/s]\n",
|
||
|
" 50%|##### | 149/297 [02:13<02:08, 1.15it/s]\n",
|
||
|
" 51%|##### | 150/297 [02:14<02:13, 1.10it/s]\n",
|
||
|
" 51%|##### | 151/297 [02:15<02:10, 1.12it/s]\n",
|
||
|
" 51%|#####1 | 152/297 [02:15<02:11, 1.11it/s]\n",
|
||
|
" 52%|#####1 | 153/297 [02:16<02:08, 1.12it/s]\n",
|
||
|
" 52%|#####1 | 154/297 [02:17<02:08, 1.11it/s]\n",
|
||
|
" 52%|#####2 | 155/297 [02:18<02:08, 1.11it/s]\n",
|
||
|
" 53%|#####2 | 156/297 [02:19<02:09, 1.09it/s]\n",
|
||
|
" 53%|#####2 | 157/297 [02:20<02:07, 1.10it/s]\n",
|
||
|
" 53%|#####3 | 158/297 [02:21<02:04, 1.12it/s]\n",
|
||
|
" 54%|#####3 | 159/297 [02:22<02:02, 1.13it/s]\n",
|
||
|
" 54%|#####3 | 160/297 [02:23<02:01, 1.13it/s]\n",
|
||
|
" 54%|#####4 | 161/297 [02:23<01:56, 1.17it/s]\n",
|
||
|
" 55%|#####4 | 162/297 [02:24<01:56, 1.16it/s]\n",
|
||
|
" 55%|#####4 | 163/297 [02:25<01:59, 1.12it/s]\n",
|
||
|
" 55%|#####5 | 164/297 [02:26<01:58, 1.12it/s]\n",
|
||
|
" 56%|#####5 | 165/297 [02:27<01:55, 1.14it/s]\n",
|
||
|
" 56%|#####5 | 166/297 [02:28<01:55, 1.14it/s]\n",
|
||
|
" 56%|#####6 | 167/297 [02:29<01:54, 1.14it/s]\n",
|
||
|
" 57%|#####6 | 168/297 [02:30<01:54, 1.12it/s]\n",
|
||
|
" 57%|#####6 | 169/297 [02:31<02:02, 1.05it/s]\n",
|
||
|
" 57%|#####7 | 170/297 [02:32<01:59, 1.06it/s]\n",
|
||
|
" 58%|#####7 | 171/297 [02:33<01:57, 1.07it/s]\n",
|
||
|
" 58%|#####7 | 172/297 [02:34<02:17, 1.10s/it]\n",
|
||
|
" 58%|#####8 | 173/297 [02:35<02:11, 1.06s/it]\n",
|
||
|
" 59%|#####8 | 174/297 [02:36<02:05, 1.02s/it]\n",
|
||
|
" 59%|#####8 | 175/297 [02:37<01:58, 1.03it/s]\n",
|
||
|
" 59%|#####9 | 176/297 [02:38<01:56, 1.04it/s]\n",
|
||
|
" 60%|#####9 | 177/297 [02:39<01:57, 1.02it/s]\n",
|
||
|
" 60%|#####9 | 178/297 [02:40<01:56, 1.02it/s]\n",
|
||
|
" 60%|###### | 179/297 [02:41<01:53, 1.04it/s]\n",
|
||
|
" 61%|###### | 180/297 [02:42<01:49, 1.07it/s]\n",
|
||
|
" 61%|###### | 181/297 [02:42<01:47, 1.08it/s]\n",
|
||
|
" 61%|######1 | 182/297 [02:43<01:45, 1.09it/s]\n",
|
||
|
" 62%|######1 | 183/297 [02:44<01:43, 1.10it/s]\n",
|
||
|
" 62%|######1 | 184/297 [02:45<01:43, 1.09it/s]\n",
|
||
|
" 62%|######2 | 185/297 [02:46<01:45, 1.06it/s]\n",
|
||
|
" 63%|######2 | 186/297 [02:47<01:43, 1.08it/s]\n",
|
||
|
" 63%|######2 | 187/297 [02:48<01:45, 1.05it/s]\n",
|
||
|
" 63%|######3 | 188/297 [02:49<01:40, 1.09it/s]\n",
|
||
|
" 64%|######3 | 189/297 [02:50<01:37, 1.11it/s]\n",
|
||
|
" 64%|######3 | 190/297 [02:51<01:35, 1.11it/s]\n",
|
||
|
" 64%|######4 | 191/297 [02:52<01:36, 1.10it/s]\n",
|
||
|
" 65%|######4 | 192/297 [02:53<01:35, 1.10it/s]\n",
|
||
|
" 65%|######4 | 193/297 [02:54<01:37, 1.06it/s]\n",
|
||
|
" 65%|######5 | 194/297 [02:54<01:35, 1.07it/s]\n",
|
||
|
" 66%|######5 | 195/297 [02:55<01:37, 1.05it/s]\n",
|
||
|
" 66%|######5 | 196/297 [02:56<01:32, 1.10it/s]\n",
|
||
|
" 66%|######6 | 197/297 [02:57<01:29, 1.12it/s]\n",
|
||
|
" 67%|######6 | 198/297 [02:58<01:41, 1.02s/it]\n",
|
||
|
" 67%|######7 | 199/297 [03:00<01:41, 1.03s/it]\n",
|
||
|
" 67%|######7 | 200/297 [03:00<01:37, 1.01s/it]\n",
|
||
|
" 68%|######7 | 201/297 [03:01<01:32, 1.04it/s]\n",
|
||
|
" 68%|######8 | 202/297 [03:02<01:27, 1.09it/s]\n",
|
||
|
" 68%|######8 | 203/297 [03:03<01:24, 1.12it/s]\n",
|
||
|
" 69%|######8 | 204/297 [03:04<01:23, 1.11it/s]\n",
|
||
|
" 69%|######9 | 205/297 [03:05<01:26, 1.06it/s]\n",
|
||
|
" 69%|######9 | 206/297 [03:06<01:28, 1.03it/s]\n",
|
||
|
" 70%|######9 | 207/297 [03:07<01:26, 1.05it/s]\n",
|
||
|
" 70%|####### | 208/297 [03:08<01:26, 1.02it/s]\n",
|
||
|
" 70%|####### | 209/297 [03:09<01:22, 1.07it/s]\n",
|
||
|
" 71%|####### | 210/297 [03:10<01:21, 1.06it/s]\n",
|
||
|
" 71%|#######1 | 211/297 [03:11<01:19, 1.08it/s]\n",
|
||
|
" 71%|#######1 | 212/297 [03:12<01:18, 1.09it/s]\n",
|
||
|
" 72%|#######1 | 213/297 [03:12<01:17, 1.08it/s]\n",
|
||
|
" 72%|#######2 | 214/297 [03:13<01:17, 1.07it/s]\n",
|
||
|
" 72%|#######2 | 215/297 [03:14<01:15, 1.08it/s]\n",
|
||
|
" 73%|#######2 | 216/297 [03:15<01:14, 1.08it/s]\n",
|
||
|
" 73%|#######3 | 217/297 [03:16<01:12, 1.10it/s]\n",
|
||
|
" 73%|#######3 | 218/297 [03:17<01:10, 1.12it/s]\n",
|
||
|
" 74%|#######3 | 219/297 [03:18<01:10, 1.11it/s]\n",
|
||
|
" 74%|#######4 | 220/297 [03:19<01:09, 1.11it/s]\n",
|
||
|
" 74%|#######4 | 221/297 [03:20<01:08, 1.12it/s]\n",
|
||
|
" 75%|#######4 | 222/297 [03:20<01:05, 1.14it/s]\n",
|
||
|
" 75%|#######5 | 223/297 [03:21<01:05, 1.13it/s]\n",
|
||
|
" 75%|#######5 | 224/297 [03:22<01:05, 1.11it/s]\n",
|
||
|
" 76%|#######5 | 225/297 [03:23<01:07, 1.06it/s]\n",
|
||
|
" 76%|#######6 | 226/297 [03:24<01:04, 1.09it/s]\n",
|
||
|
" 76%|#######6 | 227/297 [03:25<01:03, 1.10it/s]\n",
|
||
|
" 77%|#######6 | 228/297 [03:26<01:01, 1.11it/s]\n",
|
||
|
" 77%|#######7 | 229/297 [03:27<01:01, 1.10it/s]\n",
|
||
|
" 77%|#######7 | 230/297 [03:28<01:01, 1.09it/s]\n",
|
||
|
" 78%|#######7 | 231/297 [03:29<00:58, 1.13it/s]\n",
|
||
|
" 78%|#######8 | 232/297 [03:29<00:55, 1.18it/s]\n",
|
||
|
" 78%|#######8 | 233/297 [03:30<00:54, 1.18it/s]\n",
|
||
|
" 79%|#######8 | 234/297 [03:31<00:55, 1.14it/s]\n",
|
||
|
" 79%|#######9 | 235/297 [03:32<00:55, 1.12it/s]\n",
|
||
|
" 79%|#######9 | 236/297 [03:33<00:55, 1.10it/s]\n",
|
||
|
" 80%|#######9 | 237/297 [03:34<00:54, 1.11it/s]\n",
|
||
|
" 80%|######## | 238/297 [03:35<00:54, 1.08it/s]\n",
|
||
|
" 80%|######## | 239/297 [03:36<00:54, 1.06it/s]\n",
|
||
|
" 81%|######## | 240/297 [03:37<00:52, 1.09it/s]\n",
|
||
|
" 81%|########1 | 241/297 [03:38<00:51, 1.08it/s]\n",
|
||
|
" 81%|########1 | 242/297 [03:39<00:49, 1.10it/s]\n",
|
||
|
" 82%|########1 | 243/297 [03:39<00:47, 1.13it/s]\n",
|
||
|
" 82%|########2 | 244/297 [03:40<00:46, 1.14it/s]\n",
|
||
|
" 82%|########2 | 245/297 [03:41<00:46, 1.12it/s]\n",
|
||
|
" 83%|########2 | 246/297 [03:42<00:45, 1.11it/s]\n",
|
||
|
" 83%|########3 | 247/297 [03:43<00:44, 1.11it/s]\n",
|
||
|
" 84%|########3 | 248/297 [03:44<00:48, 1.00it/s]\n",
|
||
|
" 84%|########3 | 249/297 [03:45<00:47, 1.01it/s]\n",
|
||
|
" 84%|########4 | 250/297 [03:46<00:45, 1.03it/s]\n",
|
||
|
" 85%|########4 | 251/297 [03:47<00:44, 1.03it/s]\n",
|
||
|
" 85%|########4 | 252/297 [03:48<00:42, 1.05it/s]\n",
|
||
|
" 85%|########5 | 253/297 [03:49<00:41, 1.07it/s]\n",
|
||
|
" 86%|########5 | 254/297 [03:50<00:40, 1.06it/s]\n",
|
||
|
" 86%|########5 | 255/297 [03:51<00:40, 1.04it/s]\n",
|
||
|
" 86%|########6 | 256/297 [03:52<00:40, 1.01it/s]\n",
|
||
|
" 87%|########6 | 257/297 [03:53<00:38, 1.05it/s]\n",
|
||
|
" 87%|########6 | 258/297 [03:54<00:36, 1.06it/s]\n",
|
||
|
" 87%|########7 | 259/297 [03:55<00:35, 1.08it/s]\n",
|
||
|
" 88%|########7 | 260/297 [03:56<00:34, 1.07it/s]\n",
|
||
|
" 88%|########7 | 261/297 [03:57<00:33, 1.08it/s]\n",
|
||
|
" 88%|########8 | 262/297 [03:57<00:32, 1.06it/s]\n",
|
||
|
" 89%|########8 | 263/297 [03:58<00:31, 1.08it/s]\n",
|
||
|
" 89%|########8 | 264/297 [03:59<00:31, 1.05it/s]\n",
|
||
|
" 89%|########9 | 265/297 [04:00<00:30, 1.04it/s]\n",
|
||
|
" 90%|########9 | 266/297 [04:01<00:30, 1.01it/s]\n",
|
||
|
" 90%|########9 | 267/297 [04:02<00:28, 1.06it/s]\n",
|
||
|
" 90%|######### | 268/297 [04:03<00:26, 1.08it/s]\n",
|
||
|
" 91%|######### | 269/297 [04:04<00:26, 1.04it/s]\n",
|
||
|
" 91%|######### | 270/297 [04:05<00:25, 1.04it/s]\n",
|
||
|
" 91%|#########1| 271/297 [04:06<00:23, 1.09it/s]\n",
|
||
|
" 92%|#########1| 272/297 [04:07<00:23, 1.07it/s]\n",
|
||
|
" 92%|#########1| 273/297 [04:08<00:22, 1.08it/s]\n",
|
||
|
" 92%|#########2| 274/297 [04:09<00:21, 1.08it/s]\n",
|
||
|
" 93%|#########2| 275/297 [04:10<00:20, 1.08it/s]\n",
|
||
|
" 93%|#########2| 276/297 [04:11<00:19, 1.10it/s]\n",
|
||
|
" 93%|#########3| 277/297 [04:12<00:18, 1.06it/s]\n",
|
||
|
" 94%|#########3| 278/297 [04:12<00:17, 1.07it/s]\n",
|
||
|
" 94%|#########3| 279/297 [04:13<00:16, 1.07it/s]\n",
|
||
|
" 94%|#########4| 280/297 [04:14<00:16, 1.06it/s]\n",
|
||
|
" 95%|#########4| 281/297 [04:15<00:14, 1.07it/s]\n",
|
||
|
" 95%|#########4| 282/297 [04:16<00:13, 1.11it/s]\n",
|
||
|
" 95%|#########5| 283/297 [04:17<00:12, 1.09it/s]\n",
|
||
|
" 96%|#########5| 284/297 [04:18<00:13, 1.00s/it]\n",
|
||
|
" 96%|#########5| 285/297 [04:19<00:11, 1.01it/s]\n",
|
||
|
" 96%|#########6| 286/297 [04:20<00:10, 1.04it/s]\n",
|
||
|
" 97%|#########6| 287/297 [04:21<00:09, 1.08it/s]\n",
|
||
|
" 97%|#########6| 288/297 [04:22<00:08, 1.10it/s]\n",
|
||
|
" 97%|#########7| 289/297 [04:23<00:07, 1.08it/s]\n",
|
||
|
" 98%|#########7| 290/297 [04:24<00:06, 1.11it/s]\n",
|
||
|
" 98%|#########7| 291/297 [04:25<00:05, 1.09it/s]\n",
|
||
|
" 98%|#########8| 292/297 [04:26<00:04, 1.07it/s]\n",
|
||
|
" 99%|#########8| 293/297 [04:26<00:03, 1.10it/s]\n",
|
||
|
" 99%|#########8| 294/297 [04:27<00:02, 1.12it/s]\n",
|
||
|
" 99%|#########9| 295/297 [04:28<00:01, 1.07it/s]\n",
|
||
|
"100%|#########9| 296/297 [04:29<00:00, 1.03it/s]\n",
|
||
|
"100%|##########| 297/297 [04:30<00:00, 1.19it/s]02/17/2022 17:18:41 - INFO - __main__ - Validation-set | bleu: 0.0 | accuracy: 1.0\n",
|
||
|
"02/17/2022 17:18:49 - INFO - __main__ - Test-set | bleu: 0.0 | accuracy: 1.0\n",
|
||
|
"Configuration saved in out/tweet/t5\\config.json\n",
|
||
|
"Model weights saved in out/tweet/t5\\pytorch_model.bin\n",
|
||
|
"tokenizer config file saved in out/tweet/t5\\tokenizer_config.json\n",
|
||
|
"Special tokens file saved in out/tweet/t5\\special_tokens_map.json\n",
|
||
|
"Copy vocab file to out/tweet/t5\\spiece.model\n",
|
||
|
"\n",
|
||
|
"100%|##########| 297/297 [04:46<00:00, 1.04it/s]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"!python run_translation_no_trainer.py \\\n",
|
||
|
" --model_name_or_path t5-small \\\n",
|
||
|
" --train_file data/translations-train.json \\\n",
|
||
|
" --validation_file data/translations-valid.json \\\n",
|
||
|
" --test_file data/translations-test.json \\\n",
|
||
|
" --per_device_train_batch_size 16 \\\n",
|
||
|
" --per_device_eval_batch_size 16 \\\n",
|
||
|
" --source_prefix \"tweet classification\" \\\n",
|
||
|
" --max_source_length 256 \\\n",
|
||
|
" --max_target_length 128 \\\n",
|
||
|
" --max_length 128 \\\n",
|
||
|
" --num_train_epochs 1 \\\n",
|
||
|
" --output_dir out/tweet/t5"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# T5 version 2"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 18,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"02/17/2022 17:23:00 - INFO - __main__ - Distributed environment: NO\n",
|
||
|
"Num processes: 1\n",
|
||
|
"Process index: 0\n",
|
||
|
"Local process index: 0\n",
|
||
|
"Device: cpu\n",
|
||
|
"Use FP16 precision: False\n",
|
||
|
"\n",
|
||
|
"02/17/2022 17:23:00 - WARNING - datasets.builder - Using custom data configuration default-c1907d9305fb2fbb\n",
|
||
|
"02/17/2022 17:23:00 - WARNING - datasets.builder - Reusing dataset json (C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-c1907d9305fb2fbb\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426)\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/3 [00:00<?, ?it/s]\n",
|
||
|
"100%|##########| 3/3 [00:00<00:00, 1504.41it/s]\n",
|
||
|
"loading configuration file https://huggingface.co/t5-small/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fe501e8fd6425b8ec93df37767fcce78ce626e34cc5edc859c662350cf712e41.406701565c0afd9899544c1cb8b93185a76f00b31e5ce7f6e18bbaef02241985\n",
|
||
|
"Model config T5Config {\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"T5WithLMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"d_ff\": 2048,\n",
|
||
|
" \"d_kv\": 64,\n",
|
||
|
" \"d_model\": 512,\n",
|
||
|
" \"decoder_start_token_id\": 0,\n",
|
||
|
" \"dropout_rate\": 0.1,\n",
|
||
|
" \"eos_token_id\": 1,\n",
|
||
|
" \"feed_forward_proj\": \"relu\",\n",
|
||
|
" \"initializer_factor\": 1.0,\n",
|
||
|
" \"is_encoder_decoder\": true,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-06,\n",
|
||
|
" \"model_type\": \"t5\",\n",
|
||
|
" \"n_positions\": 512,\n",
|
||
|
" \"num_decoder_layers\": 6,\n",
|
||
|
" \"num_heads\": 8,\n",
|
||
|
" \"num_layers\": 6,\n",
|
||
|
" \"output_past\": true,\n",
|
||
|
" \"pad_token_id\": 0,\n",
|
||
|
" \"relative_attention_num_buckets\": 32,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"summarization\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"length_penalty\": 2.0,\n",
|
||
|
" \"max_length\": 200,\n",
|
||
|
" \"min_length\": 30,\n",
|
||
|
" \"no_repeat_ngram_size\": 3,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"summarize: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_de\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to German: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_fr\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to French: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_ro\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to Romanian: \"\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 32128\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
|
||
|
"loading configuration file https://huggingface.co/t5-small/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fe501e8fd6425b8ec93df37767fcce78ce626e34cc5edc859c662350cf712e41.406701565c0afd9899544c1cb8b93185a76f00b31e5ce7f6e18bbaef02241985\n",
|
||
|
"Model config T5Config {\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"T5WithLMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"d_ff\": 2048,\n",
|
||
|
" \"d_kv\": 64,\n",
|
||
|
" \"d_model\": 512,\n",
|
||
|
" \"decoder_start_token_id\": 0,\n",
|
||
|
" \"dropout_rate\": 0.1,\n",
|
||
|
" \"eos_token_id\": 1,\n",
|
||
|
" \"feed_forward_proj\": \"relu\",\n",
|
||
|
" \"initializer_factor\": 1.0,\n",
|
||
|
" \"is_encoder_decoder\": true,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-06,\n",
|
||
|
" \"model_type\": \"t5\",\n",
|
||
|
" \"n_positions\": 512,\n",
|
||
|
" \"num_decoder_layers\": 6,\n",
|
||
|
" \"num_heads\": 8,\n",
|
||
|
" \"num_layers\": 6,\n",
|
||
|
" \"output_past\": true,\n",
|
||
|
" \"pad_token_id\": 0,\n",
|
||
|
" \"relative_attention_num_buckets\": 32,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"summarization\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"length_penalty\": 2.0,\n",
|
||
|
" \"max_length\": 200,\n",
|
||
|
" \"min_length\": 30,\n",
|
||
|
" \"no_repeat_ngram_size\": 3,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"summarize: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_de\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to German: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_fr\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to French: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_ro\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to Romanian: \"\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 32128\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"loading file https://huggingface.co/t5-small/resolve/main/spiece.model from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\65fc04e21f45f61430aea0c4fedffac16a4d20d78b8e6601d8d996ebefefecd2.3b69006860e7b5d0a63ffdddc01ddcd6b7c318a6f4fd793596552c741734c62d\n",
|
||
|
"loading file https://huggingface.co/t5-small/resolve/main/tokenizer.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\06779097c78e12f47ef67ecb728810c2ae757ee0a9efe9390c6419783d99382d.8627f1bd5d270a9fd2e5a51c8bec3223896587cc3cfe13edeabb0992ab43c529\n",
|
||
|
"loading file https://huggingface.co/t5-small/resolve/main/added_tokens.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/t5-small/resolve/main/special_tokens_map.json from cache at None\n",
|
||
|
"loading file https://huggingface.co/t5-small/resolve/main/tokenizer_config.json from cache at None\n",
|
||
|
"loading configuration file https://huggingface.co/t5-small/resolve/main/config.json from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fe501e8fd6425b8ec93df37767fcce78ce626e34cc5edc859c662350cf712e41.406701565c0afd9899544c1cb8b93185a76f00b31e5ce7f6e18bbaef02241985\n",
|
||
|
"Model config T5Config {\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"T5WithLMHeadModel\"\n",
|
||
|
" ],\n",
|
||
|
" \"d_ff\": 2048,\n",
|
||
|
" \"d_kv\": 64,\n",
|
||
|
" \"d_model\": 512,\n",
|
||
|
" \"decoder_start_token_id\": 0,\n",
|
||
|
" \"dropout_rate\": 0.1,\n",
|
||
|
" \"eos_token_id\": 1,\n",
|
||
|
" \"feed_forward_proj\": \"relu\",\n",
|
||
|
" \"initializer_factor\": 1.0,\n",
|
||
|
" \"is_encoder_decoder\": true,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-06,\n",
|
||
|
" \"model_type\": \"t5\",\n",
|
||
|
" \"n_positions\": 512,\n",
|
||
|
" \"num_decoder_layers\": 6,\n",
|
||
|
" \"num_heads\": 8,\n",
|
||
|
" \"num_layers\": 6,\n",
|
||
|
" \"output_past\": true,\n",
|
||
|
" \"pad_token_id\": 0,\n",
|
||
|
" \"relative_attention_num_buckets\": 32,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"summarization\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"length_penalty\": 2.0,\n",
|
||
|
" \"max_length\": 200,\n",
|
||
|
" \"min_length\": 30,\n",
|
||
|
" \"no_repeat_ngram_size\": 3,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"summarize: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_de\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to German: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_fr\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to French: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_ro\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to Romanian: \"\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 32128\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"loading weights file https://huggingface.co/t5-small/resolve/main/pytorch_model.bin from cache at C:\\Users\\Foka/.cache\\huggingface\\transformers\\fee5a3a0ae379232608b6eed45d2d7a0d2966b9683728838412caccc41b4b0ed.ddacdc89ec88482db20c676f0861a336f3d0409f94748c209847b49529d73885\n",
|
||
|
"All model checkpoint weights were used when initializing T5ForConditionalGeneration.\n",
|
||
|
"\n",
|
||
|
"All the weights of T5ForConditionalGeneration were initialized from the model checkpoint at t5-small.\n",
|
||
|
"If your task is similar to the task the model of the checkpoint was trained on, you can already use T5ForConditionalGeneration for predictions without further training.\n",
|
||
|
"02/17/2022 17:23:07 - INFO - __main__ - Freezing model weights\n",
|
||
|
"02/17/2022 17:23:07 - INFO - __main__ - Using translation prefix: \"tweet classification: \"\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/5 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 80%|######## | 4/5 [00:00<00:00, 31.58ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 5/5 [00:00<00:00, 33.64ba/s]\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/1 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 1/1 [00:00<00:00, 66.85ba/s]\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/1 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 1/1 [00:00<00:00, 77.13ba/s]\n",
|
||
|
"02/17/2022 17:23:07 - INFO - __main__ - Sample 4497 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [10657, 13774, 10, 34, 31, 7, 16713, 239, 3158, 3, 2, 1], 'labels': [150, 5591, 1]}.\n",
|
||
|
"02/17/2022 17:23:07 - INFO - __main__ - Sample 697 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [10657, 13774, 10, 3320, 10041, 3, 6631, 7, 55, 3, 23, 410, 34, 541, 55, 3, 19293, 430, 18659, 2983, 89, 16948, 55, 1713, 7, 9, 26, 1713, 7, 127, 15, 2298, 49, 3, 24778, 1713, 1788, 6938, 2910, 29, 53, 1], 'labels': [5591, 1]}.\n",
|
||
|
"02/17/2022 17:23:07 - INFO - __main__ - Sample 3411 of the training set: {'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'input_ids': [10657, 13774, 10, 8441, 352, 12, 217, 3320, 10041, 16, 20, 75, 3, 10266, 55, 1], 'labels': [150, 5591, 1]}.\n",
|
||
|
"02/17/2022 17:23:09 - INFO - __main__ - ***** Running training *****\n",
|
||
|
"02/17/2022 17:23:09 - INFO - __main__ - Num examples = 4742\n",
|
||
|
"02/17/2022 17:23:09 - INFO - __main__ - Num Epochs = 1\n",
|
||
|
"02/17/2022 17:23:09 - INFO - __main__ - Instantaneous batch size per device = 16\n",
|
||
|
"02/17/2022 17:23:09 - INFO - __main__ - Total train batch size (w. parallel, distributed & accumulation) = 16\n",
|
||
|
"02/17/2022 17:23:09 - INFO - __main__ - Gradient Accumulation steps = 1\n",
|
||
|
"02/17/2022 17:23:09 - INFO - __main__ - Total optimization steps = 297\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/297 [00:00<?, ?it/s]\n",
|
||
|
" 0%| | 1/297 [00:00<02:34, 1.92it/s]\n",
|
||
|
" 1%| | 2/297 [00:00<02:08, 2.29it/s]\n",
|
||
|
" 1%|1 | 3/297 [00:01<01:58, 2.47it/s]\n",
|
||
|
" 1%|1 | 4/297 [00:01<01:53, 2.58it/s]\n",
|
||
|
" 2%|1 | 5/297 [00:02<01:59, 2.45it/s]\n",
|
||
|
" 2%|2 | 6/297 [00:02<02:02, 2.37it/s]\n",
|
||
|
" 2%|2 | 7/297 [00:02<01:58, 2.46it/s]\n",
|
||
|
" 3%|2 | 8/297 [00:03<01:53, 2.55it/s]\n",
|
||
|
" 3%|3 | 9/297 [00:03<02:14, 2.14it/s]\n",
|
||
|
" 3%|3 | 10/297 [00:04<02:12, 2.17it/s]\n",
|
||
|
" 4%|3 | 11/297 [00:04<02:05, 2.28it/s]\n",
|
||
|
" 4%|4 | 12/297 [00:05<02:01, 2.34it/s]\n",
|
||
|
" 4%|4 | 13/297 [00:05<02:00, 2.36it/s]\n",
|
||
|
" 5%|4 | 14/297 [00:05<01:57, 2.42it/s]\n",
|
||
|
" 5%|5 | 15/297 [00:06<01:54, 2.45it/s]\n",
|
||
|
" 5%|5 | 16/297 [00:06<01:50, 2.54it/s]\n",
|
||
|
" 6%|5 | 17/297 [00:07<01:49, 2.57it/s]\n",
|
||
|
" 6%|6 | 18/297 [00:07<01:48, 2.58it/s]\n",
|
||
|
" 6%|6 | 19/297 [00:07<01:50, 2.53it/s]\n",
|
||
|
" 7%|6 | 20/297 [00:08<01:52, 2.46it/s]\n",
|
||
|
" 7%|7 | 21/297 [00:08<01:49, 2.53it/s]\n",
|
||
|
" 7%|7 | 22/297 [00:09<01:52, 2.45it/s]\n",
|
||
|
" 8%|7 | 23/297 [00:09<01:48, 2.52it/s]\n",
|
||
|
" 8%|8 | 24/297 [00:09<01:47, 2.55it/s]\n",
|
||
|
" 8%|8 | 25/297 [00:10<01:48, 2.51it/s]\n",
|
||
|
" 9%|8 | 26/297 [00:10<01:45, 2.56it/s]\n",
|
||
|
" 9%|9 | 27/297 [00:11<01:45, 2.56it/s]\n",
|
||
|
" 9%|9 | 28/297 [00:11<01:45, 2.55it/s]\n",
|
||
|
" 10%|9 | 29/297 [00:11<01:45, 2.54it/s]\n",
|
||
|
" 10%|# | 30/297 [00:12<01:41, 2.62it/s]\n",
|
||
|
" 10%|# | 31/297 [00:12<01:42, 2.59it/s]\n",
|
||
|
" 11%|# | 32/297 [00:12<01:39, 2.66it/s]\n",
|
||
|
" 11%|#1 | 33/297 [00:13<01:39, 2.64it/s]\n",
|
||
|
" 11%|#1 | 34/297 [00:13<01:38, 2.67it/s]\n",
|
||
|
" 12%|#1 | 35/297 [00:14<01:38, 2.67it/s]\n",
|
||
|
" 12%|#2 | 36/297 [00:14<01:40, 2.60it/s]\n",
|
||
|
" 12%|#2 | 37/297 [00:14<01:45, 2.46it/s]\n",
|
||
|
" 13%|#2 | 38/297 [00:15<01:44, 2.48it/s]\n",
|
||
|
" 13%|#3 | 39/297 [00:15<01:42, 2.52it/s]\n",
|
||
|
" 13%|#3 | 40/297 [00:16<01:41, 2.53it/s]\n",
|
||
|
" 14%|#3 | 41/297 [00:16<01:42, 2.50it/s]\n",
|
||
|
" 14%|#4 | 42/297 [00:16<01:37, 2.62it/s]\n",
|
||
|
" 14%|#4 | 43/297 [00:17<01:38, 2.59it/s]\n",
|
||
|
" 15%|#4 | 44/297 [00:17<01:40, 2.52it/s]\n",
|
||
|
" 15%|#5 | 45/297 [00:18<01:37, 2.59it/s]\n",
|
||
|
" 15%|#5 | 46/297 [00:18<01:37, 2.59it/s]\n",
|
||
|
" 16%|#5 | 47/297 [00:18<01:37, 2.56it/s]\n",
|
||
|
" 16%|#6 | 48/297 [00:19<01:36, 2.57it/s]\n",
|
||
|
" 16%|#6 | 49/297 [00:19<01:36, 2.58it/s]\n",
|
||
|
" 17%|#6 | 50/297 [00:19<01:35, 2.59it/s]\n",
|
||
|
" 17%|#7 | 51/297 [00:20<01:35, 2.56it/s]\n",
|
||
|
" 18%|#7 | 52/297 [00:20<01:34, 2.58it/s]\n",
|
||
|
" 18%|#7 | 53/297 [00:21<01:34, 2.58it/s]\n",
|
||
|
" 18%|#8 | 54/297 [00:21<01:33, 2.60it/s]\n",
|
||
|
" 19%|#8 | 55/297 [00:21<01:34, 2.55it/s]\n",
|
||
|
" 19%|#8 | 56/297 [00:22<01:34, 2.54it/s]\n",
|
||
|
" 19%|#9 | 57/297 [00:22<01:36, 2.50it/s]\n",
|
||
|
" 20%|#9 | 58/297 [00:23<01:35, 2.52it/s]\n",
|
||
|
" 20%|#9 | 59/297 [00:23<01:33, 2.55it/s]\n",
|
||
|
" 20%|## | 60/297 [00:23<01:31, 2.58it/s]\n",
|
||
|
" 21%|## | 61/297 [00:24<01:36, 2.46it/s]\n",
|
||
|
" 21%|## | 62/297 [00:24<01:32, 2.53it/s]\n",
|
||
|
" 21%|##1 | 63/297 [00:25<01:35, 2.45it/s]\n",
|
||
|
" 22%|##1 | 64/297 [00:25<01:32, 2.53it/s]\n",
|
||
|
" 22%|##1 | 65/297 [00:25<01:32, 2.51it/s]\n",
|
||
|
" 22%|##2 | 66/297 [00:26<01:30, 2.54it/s]\n",
|
||
|
" 23%|##2 | 67/297 [00:26<01:30, 2.54it/s]\n",
|
||
|
" 23%|##2 | 68/297 [00:27<01:36, 2.38it/s]\n",
|
||
|
" 23%|##3 | 69/297 [00:27<01:32, 2.46it/s]\n",
|
||
|
" 24%|##3 | 70/297 [00:27<01:33, 2.43it/s]\n",
|
||
|
" 24%|##3 | 71/297 [00:28<01:29, 2.52it/s]\n",
|
||
|
" 24%|##4 | 72/297 [00:28<01:29, 2.52it/s]\n",
|
||
|
" 25%|##4 | 73/297 [00:29<01:29, 2.49it/s]\n",
|
||
|
" 25%|##4 | 74/297 [00:29<01:31, 2.43it/s]\n",
|
||
|
" 25%|##5 | 75/297 [00:29<01:32, 2.39it/s]\n",
|
||
|
" 26%|##5 | 76/297 [00:30<01:31, 2.42it/s]\n",
|
||
|
" 26%|##5 | 77/297 [00:30<01:31, 2.40it/s]\n",
|
||
|
" 26%|##6 | 78/297 [00:31<01:29, 2.45it/s]\n",
|
||
|
" 27%|##6 | 79/297 [00:31<01:27, 2.48it/s]\n",
|
||
|
" 27%|##6 | 80/297 [00:31<01:26, 2.51it/s]\n",
|
||
|
" 27%|##7 | 81/297 [00:32<01:25, 2.53it/s]\n",
|
||
|
" 28%|##7 | 82/297 [00:32<01:26, 2.48it/s]\n",
|
||
|
" 28%|##7 | 83/297 [00:33<01:26, 2.47it/s]\n",
|
||
|
" 28%|##8 | 84/297 [00:33<01:29, 2.38it/s]\n",
|
||
|
" 29%|##8 | 85/297 [00:34<01:25, 2.49it/s]\n",
|
||
|
" 29%|##8 | 86/297 [00:34<01:23, 2.53it/s]\n",
|
||
|
" 29%|##9 | 87/297 [00:34<01:25, 2.46it/s]\n",
|
||
|
" 30%|##9 | 88/297 [00:35<01:27, 2.40it/s]\n",
|
||
|
" 30%|##9 | 89/297 [00:35<01:28, 2.35it/s]\n",
|
||
|
" 30%|### | 90/297 [00:36<01:26, 2.39it/s]\n",
|
||
|
" 31%|### | 91/297 [00:36<01:27, 2.35it/s]\n",
|
||
|
" 31%|### | 92/297 [00:36<01:23, 2.44it/s]\n",
|
||
|
" 31%|###1 | 93/297 [00:37<01:22, 2.48it/s]\n",
|
||
|
" 32%|###1 | 94/297 [00:37<01:24, 2.40it/s]\n",
|
||
|
" 32%|###1 | 95/297 [00:38<01:21, 2.47it/s]\n",
|
||
|
" 32%|###2 | 96/297 [00:38<01:20, 2.50it/s]\n",
|
||
|
" 33%|###2 | 97/297 [00:38<01:21, 2.46it/s]\n",
|
||
|
" 33%|###2 | 98/297 [00:39<01:19, 2.49it/s]\n",
|
||
|
" 33%|###3 | 99/297 [00:39<01:19, 2.49it/s]\n",
|
||
|
" 34%|###3 | 100/297 [00:40<01:16, 2.56it/s]\n",
|
||
|
" 34%|###4 | 101/297 [00:40<01:15, 2.60it/s]\n",
|
||
|
" 34%|###4 | 102/297 [00:40<01:17, 2.52it/s]\n",
|
||
|
" 35%|###4 | 103/297 [00:41<01:21, 2.39it/s]\n",
|
||
|
" 35%|###5 | 104/297 [00:41<01:18, 2.46it/s]\n",
|
||
|
" 35%|###5 | 105/297 [00:42<01:17, 2.47it/s]\n",
|
||
|
" 36%|###5 | 106/297 [00:42<01:14, 2.55it/s]\n",
|
||
|
" 36%|###6 | 107/297 [00:42<01:15, 2.50it/s]\n",
|
||
|
" 36%|###6 | 108/297 [00:43<01:14, 2.53it/s]\n",
|
||
|
" 37%|###6 | 109/297 [00:43<01:14, 2.53it/s]\n",
|
||
|
" 37%|###7 | 110/297 [00:44<01:12, 2.57it/s]\n",
|
||
|
" 37%|###7 | 111/297 [00:44<01:11, 2.59it/s]\n",
|
||
|
" 38%|###7 | 112/297 [00:44<01:11, 2.60it/s]\n",
|
||
|
" 38%|###8 | 113/297 [00:45<01:09, 2.65it/s]\n",
|
||
|
" 38%|###8 | 114/297 [00:45<01:09, 2.64it/s]\n",
|
||
|
" 39%|###8 | 115/297 [00:46<01:12, 2.52it/s]\n",
|
||
|
" 39%|###9 | 116/297 [00:46<01:15, 2.41it/s]\n",
|
||
|
" 39%|###9 | 117/297 [00:46<01:10, 2.55it/s]\n",
|
||
|
" 40%|###9 | 118/297 [00:47<01:09, 2.58it/s]\n",
|
||
|
" 40%|#### | 119/297 [00:47<01:10, 2.52it/s]\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
" 40%|#### | 120/297 [00:48<01:10, 2.53it/s]\n",
|
||
|
" 41%|#### | 121/297 [00:48<01:08, 2.56it/s]\n",
|
||
|
" 41%|####1 | 122/297 [00:48<01:08, 2.57it/s]\n",
|
||
|
" 41%|####1 | 123/297 [00:49<01:08, 2.55it/s]\n",
|
||
|
" 42%|####1 | 124/297 [00:49<01:11, 2.43it/s]\n",
|
||
|
" 42%|####2 | 125/297 [00:50<01:07, 2.53it/s]\n",
|
||
|
" 42%|####2 | 126/297 [00:50<01:11, 2.38it/s]\n",
|
||
|
" 43%|####2 | 127/297 [00:50<01:09, 2.45it/s]\n",
|
||
|
" 43%|####3 | 128/297 [00:51<01:07, 2.51it/s]\n",
|
||
|
" 43%|####3 | 129/297 [00:51<01:07, 2.49it/s]\n",
|
||
|
" 44%|####3 | 130/297 [00:52<01:07, 2.46it/s]\n",
|
||
|
" 44%|####4 | 131/297 [00:52<01:04, 2.56it/s]\n",
|
||
|
" 44%|####4 | 132/297 [00:52<01:07, 2.43it/s]\n",
|
||
|
" 45%|####4 | 133/297 [00:53<01:06, 2.45it/s]\n",
|
||
|
" 45%|####5 | 134/297 [00:53<01:06, 2.45it/s]\n",
|
||
|
" 45%|####5 | 135/297 [00:54<01:06, 2.43it/s]\n",
|
||
|
" 46%|####5 | 136/297 [00:54<01:02, 2.59it/s]\n",
|
||
|
" 46%|####6 | 137/297 [00:54<01:02, 2.55it/s]\n",
|
||
|
" 46%|####6 | 138/297 [00:55<01:05, 2.44it/s]\n",
|
||
|
" 47%|####6 | 139/297 [00:55<01:02, 2.53it/s]\n",
|
||
|
" 47%|####7 | 140/297 [00:56<01:02, 2.53it/s]\n",
|
||
|
" 47%|####7 | 141/297 [00:56<01:02, 2.51it/s]\n",
|
||
|
" 48%|####7 | 142/297 [00:56<00:59, 2.62it/s]\n",
|
||
|
" 48%|####8 | 143/297 [00:57<00:58, 2.63it/s]\n",
|
||
|
" 48%|####8 | 144/297 [00:57<00:58, 2.63it/s]\n",
|
||
|
" 49%|####8 | 145/297 [00:57<00:59, 2.57it/s]\n",
|
||
|
" 49%|####9 | 146/297 [00:58<00:59, 2.53it/s]\n",
|
||
|
" 49%|####9 | 147/297 [00:58<01:00, 2.48it/s]\n",
|
||
|
" 50%|####9 | 148/297 [00:59<00:58, 2.56it/s]\n",
|
||
|
" 50%|##### | 149/297 [00:59<01:00, 2.44it/s]\n",
|
||
|
" 51%|##### | 150/297 [00:59<00:58, 2.53it/s]\n",
|
||
|
" 51%|##### | 151/297 [01:00<00:57, 2.56it/s]\n",
|
||
|
" 51%|#####1 | 152/297 [01:00<00:56, 2.55it/s]\n",
|
||
|
" 52%|#####1 | 153/297 [01:01<00:56, 2.56it/s]\n",
|
||
|
" 52%|#####1 | 154/297 [01:01<00:56, 2.55it/s]\n",
|
||
|
" 52%|#####2 | 155/297 [01:01<00:55, 2.56it/s]\n",
|
||
|
" 53%|#####2 | 156/297 [01:02<00:55, 2.54it/s]\n",
|
||
|
" 53%|#####2 | 157/297 [01:02<00:54, 2.59it/s]\n",
|
||
|
" 53%|#####3 | 158/297 [01:03<00:54, 2.53it/s]\n",
|
||
|
" 54%|#####3 | 159/297 [01:03<00:53, 2.58it/s]\n",
|
||
|
" 54%|#####3 | 160/297 [01:03<00:52, 2.62it/s]\n",
|
||
|
" 54%|#####4 | 161/297 [01:04<00:52, 2.60it/s]\n",
|
||
|
" 55%|#####4 | 162/297 [01:04<00:51, 2.63it/s]\n",
|
||
|
" 55%|#####4 | 163/297 [01:04<00:51, 2.61it/s]\n",
|
||
|
" 55%|#####5 | 164/297 [01:05<00:51, 2.56it/s]\n",
|
||
|
" 56%|#####5 | 165/297 [01:05<00:51, 2.57it/s]\n",
|
||
|
" 56%|#####5 | 166/297 [01:06<00:51, 2.52it/s]\n",
|
||
|
" 56%|#####6 | 167/297 [01:06<00:51, 2.51it/s]\n",
|
||
|
" 57%|#####6 | 168/297 [01:06<00:51, 2.53it/s]\n",
|
||
|
" 57%|#####6 | 169/297 [01:07<00:51, 2.50it/s]\n",
|
||
|
" 57%|#####7 | 170/297 [01:07<00:51, 2.48it/s]\n",
|
||
|
" 58%|#####7 | 171/297 [01:08<00:51, 2.46it/s]\n",
|
||
|
" 58%|#####7 | 172/297 [01:08<00:50, 2.46it/s]\n",
|
||
|
" 58%|#####8 | 173/297 [01:09<00:50, 2.46it/s]\n",
|
||
|
" 59%|#####8 | 174/297 [01:09<00:48, 2.54it/s]\n",
|
||
|
" 59%|#####8 | 175/297 [01:09<00:46, 2.60it/s]\n",
|
||
|
" 59%|#####9 | 176/297 [01:10<00:46, 2.60it/s]\n",
|
||
|
" 60%|#####9 | 177/297 [01:10<00:45, 2.66it/s]\n",
|
||
|
" 60%|#####9 | 178/297 [01:10<00:47, 2.52it/s]\n",
|
||
|
" 60%|###### | 179/297 [01:11<00:47, 2.49it/s]\n",
|
||
|
" 61%|###### | 180/297 [01:11<00:45, 2.60it/s]\n",
|
||
|
" 61%|###### | 181/297 [01:12<00:44, 2.63it/s]\n",
|
||
|
" 61%|######1 | 182/297 [01:12<00:44, 2.56it/s]\n",
|
||
|
" 62%|######1 | 183/297 [01:12<00:44, 2.56it/s]\n",
|
||
|
" 62%|######1 | 184/297 [01:13<00:43, 2.60it/s]\n",
|
||
|
" 62%|######2 | 185/297 [01:13<00:48, 2.33it/s]\n",
|
||
|
" 63%|######2 | 186/297 [01:14<00:46, 2.40it/s]\n",
|
||
|
" 63%|######2 | 187/297 [01:14<00:44, 2.45it/s]\n",
|
||
|
" 63%|######3 | 188/297 [01:14<00:44, 2.45it/s]\n",
|
||
|
" 64%|######3 | 189/297 [01:15<00:44, 2.41it/s]\n",
|
||
|
" 64%|######3 | 190/297 [01:15<00:43, 2.46it/s]\n",
|
||
|
" 64%|######4 | 191/297 [01:16<00:41, 2.54it/s]\n",
|
||
|
" 65%|######4 | 192/297 [01:16<00:41, 2.52it/s]\n",
|
||
|
" 65%|######4 | 193/297 [01:16<00:41, 2.49it/s]\n",
|
||
|
" 65%|######5 | 194/297 [01:17<00:40, 2.51it/s]\n",
|
||
|
" 66%|######5 | 195/297 [01:17<00:40, 2.54it/s]\n",
|
||
|
" 66%|######5 | 196/297 [01:18<00:40, 2.51it/s]\n",
|
||
|
" 66%|######6 | 197/297 [01:18<00:39, 2.52it/s]\n",
|
||
|
" 67%|######6 | 198/297 [01:18<00:39, 2.50it/s]\n",
|
||
|
" 67%|######7 | 199/297 [01:19<00:39, 2.49it/s]\n",
|
||
|
" 67%|######7 | 200/297 [01:19<00:39, 2.44it/s]\n",
|
||
|
" 68%|######7 | 201/297 [01:20<00:43, 2.23it/s]\n",
|
||
|
" 68%|######8 | 202/297 [01:20<00:40, 2.32it/s]\n",
|
||
|
" 68%|######8 | 203/297 [01:21<00:39, 2.38it/s]\n",
|
||
|
" 69%|######8 | 204/297 [01:21<00:37, 2.45it/s]\n",
|
||
|
" 69%|######9 | 205/297 [01:21<00:37, 2.43it/s]\n",
|
||
|
" 69%|######9 | 206/297 [01:22<00:36, 2.47it/s]\n",
|
||
|
" 70%|######9 | 207/297 [01:22<00:36, 2.44it/s]\n",
|
||
|
" 70%|####### | 208/297 [01:23<00:36, 2.45it/s]\n",
|
||
|
" 70%|####### | 209/297 [01:23<00:36, 2.39it/s]\n",
|
||
|
" 71%|####### | 210/297 [01:23<00:35, 2.43it/s]\n",
|
||
|
" 71%|#######1 | 211/297 [01:24<00:34, 2.53it/s]\n",
|
||
|
" 71%|#######1 | 212/297 [01:24<00:33, 2.56it/s]\n",
|
||
|
" 72%|#######1 | 213/297 [01:25<00:32, 2.61it/s]\n",
|
||
|
" 72%|#######2 | 214/297 [01:25<00:32, 2.52it/s]\n",
|
||
|
" 72%|#######2 | 215/297 [01:25<00:32, 2.54it/s]\n",
|
||
|
" 73%|#######2 | 216/297 [01:26<00:34, 2.36it/s]\n",
|
||
|
" 73%|#######3 | 217/297 [01:26<00:33, 2.37it/s]\n",
|
||
|
" 73%|#######3 | 218/297 [01:27<00:32, 2.42it/s]\n",
|
||
|
" 74%|#######3 | 219/297 [01:27<00:32, 2.40it/s]\n",
|
||
|
" 74%|#######4 | 220/297 [01:28<00:31, 2.42it/s]\n",
|
||
|
" 74%|#######4 | 221/297 [01:28<00:31, 2.44it/s]\n",
|
||
|
" 75%|#######4 | 222/297 [01:28<00:30, 2.45it/s]\n",
|
||
|
" 75%|#######5 | 223/297 [01:29<00:30, 2.45it/s]\n",
|
||
|
" 75%|#######5 | 224/297 [01:29<00:32, 2.26it/s]\n",
|
||
|
" 76%|#######5 | 225/297 [01:30<00:31, 2.30it/s]\n",
|
||
|
" 76%|#######6 | 226/297 [01:30<00:29, 2.43it/s]\n",
|
||
|
" 76%|#######6 | 227/297 [01:30<00:28, 2.48it/s]\n",
|
||
|
" 77%|#######6 | 228/297 [01:31<00:28, 2.43it/s]\n",
|
||
|
" 77%|#######7 | 229/297 [01:31<00:27, 2.48it/s]\n",
|
||
|
" 77%|#######7 | 230/297 [01:32<00:26, 2.56it/s]\n",
|
||
|
" 78%|#######7 | 231/297 [01:32<00:25, 2.55it/s]\n",
|
||
|
" 78%|#######8 | 232/297 [01:32<00:25, 2.55it/s]\n",
|
||
|
" 78%|#######8 | 233/297 [01:33<00:24, 2.60it/s]\n",
|
||
|
" 79%|#######8 | 234/297 [01:33<00:24, 2.57it/s]\n",
|
||
|
" 79%|#######9 | 235/297 [01:34<00:24, 2.54it/s]\n",
|
||
|
" 79%|#######9 | 236/297 [01:34<00:24, 2.44it/s]\n",
|
||
|
" 80%|#######9 | 237/297 [01:34<00:24, 2.50it/s]\n",
|
||
|
" 80%|######## | 238/297 [01:35<00:23, 2.50it/s]\n",
|
||
|
" 80%|######## | 239/297 [01:35<00:22, 2.58it/s]\n",
|
||
|
" 81%|######## | 240/297 [01:35<00:21, 2.68it/s]\n",
|
||
|
" 81%|########1 | 241/297 [01:36<00:21, 2.57it/s]\n",
|
||
|
" 81%|########1 | 242/297 [01:36<00:21, 2.58it/s]\n",
|
||
|
" 82%|########1 | 243/297 [01:37<00:22, 2.44it/s]\n",
|
||
|
" 82%|########2 | 244/297 [01:37<00:21, 2.49it/s]\n",
|
||
|
" 82%|########2 | 245/297 [01:38<00:21, 2.41it/s]\n",
|
||
|
" 83%|########2 | 246/297 [01:38<00:20, 2.49it/s]\n",
|
||
|
" 83%|########3 | 247/297 [01:38<00:19, 2.58it/s]\n",
|
||
|
" 84%|########3 | 248/297 [01:39<00:18, 2.59it/s]\n",
|
||
|
" 84%|########3 | 249/297 [01:39<00:18, 2.66it/s]\n",
|
||
|
" 84%|########4 | 250/297 [01:39<00:17, 2.62it/s]\n",
|
||
|
" 85%|########4 | 251/297 [01:40<00:18, 2.55it/s]\n",
|
||
|
" 85%|########4 | 252/297 [01:40<00:17, 2.58it/s]\n",
|
||
|
" 85%|########5 | 253/297 [01:41<00:16, 2.60it/s]\n",
|
||
|
" 86%|########5 | 254/297 [01:41<00:16, 2.58it/s]\n",
|
||
|
" 86%|########5 | 255/297 [01:41<00:16, 2.60it/s]\n",
|
||
|
" 86%|########6 | 256/297 [01:42<00:15, 2.59it/s]\n",
|
||
|
" 87%|########6 | 257/297 [01:42<00:15, 2.64it/s]\n",
|
||
|
" 87%|########6 | 258/297 [01:43<00:15, 2.56it/s]\n",
|
||
|
" 87%|########7 | 259/297 [01:43<00:15, 2.49it/s]\n",
|
||
|
" 88%|########7 | 260/297 [01:43<00:14, 2.49it/s]\n",
|
||
|
" 88%|########7 | 261/297 [01:44<00:14, 2.49it/s]\n",
|
||
|
" 88%|########8 | 262/297 [01:44<00:14, 2.48it/s]\n",
|
||
|
" 89%|########8 | 263/297 [01:45<00:13, 2.45it/s]\n",
|
||
|
" 89%|########8 | 264/297 [01:45<00:13, 2.48it/s]\n",
|
||
|
" 89%|########9 | 265/297 [01:45<00:13, 2.45it/s]\n",
|
||
|
" 90%|########9 | 266/297 [01:46<00:12, 2.49it/s]\n",
|
||
|
" 90%|########9 | 267/297 [01:46<00:13, 2.30it/s]\n",
|
||
|
" 90%|######### | 268/297 [01:47<00:12, 2.38it/s]\n",
|
||
|
" 91%|######### | 269/297 [01:47<00:11, 2.38it/s]\n",
|
||
|
" 91%|######### | 270/297 [01:48<00:11, 2.33it/s]\n",
|
||
|
" 91%|#########1| 271/297 [01:48<00:10, 2.44it/s]\n",
|
||
|
" 92%|#########1| 272/297 [01:48<00:10, 2.39it/s]\n",
|
||
|
" 92%|#########1| 273/297 [01:49<00:09, 2.44it/s]\n",
|
||
|
" 92%|#########2| 274/297 [01:49<00:09, 2.42it/s]\n",
|
||
|
" 93%|#########2| 275/297 [01:50<00:08, 2.52it/s]\n",
|
||
|
" 93%|#########2| 276/297 [01:50<00:08, 2.44it/s]\n",
|
||
|
" 93%|#########3| 277/297 [01:50<00:08, 2.46it/s]\n",
|
||
|
" 94%|#########3| 278/297 [01:51<00:07, 2.46it/s]\n",
|
||
|
" 94%|#########3| 279/297 [01:51<00:07, 2.35it/s]\n",
|
||
|
" 94%|#########4| 280/297 [01:52<00:07, 2.41it/s]\n",
|
||
|
" 95%|#########4| 281/297 [01:52<00:06, 2.52it/s]\n",
|
||
|
" 95%|#########4| 282/297 [01:52<00:05, 2.53it/s]\n",
|
||
|
" 95%|#########5| 283/297 [01:53<00:05, 2.57it/s]\n",
|
||
|
" 96%|#########5| 284/297 [01:53<00:04, 2.66it/s]\n",
|
||
|
" 96%|#########5| 285/297 [01:54<00:04, 2.58it/s]\n",
|
||
|
" 96%|#########6| 286/297 [01:54<00:04, 2.59it/s]\n",
|
||
|
" 97%|#########6| 287/297 [01:54<00:03, 2.61it/s]\n",
|
||
|
" 97%|#########6| 288/297 [01:55<00:03, 2.64it/s]\n",
|
||
|
" 97%|#########7| 289/297 [01:55<00:03, 2.59it/s]\n",
|
||
|
" 98%|#########7| 290/297 [01:55<00:02, 2.56it/s]\n",
|
||
|
" 98%|#########7| 291/297 [01:56<00:02, 2.52it/s]\n",
|
||
|
" 98%|#########8| 292/297 [01:56<00:01, 2.54it/s]\n",
|
||
|
" 99%|#########8| 293/297 [01:57<00:01, 2.54it/s]\n",
|
||
|
" 99%|#########8| 294/297 [01:57<00:01, 2.48it/s]\n",
|
||
|
" 99%|#########9| 295/297 [01:57<00:00, 2.45it/s]\n",
|
||
|
"100%|#########9| 296/297 [01:58<00:00, 2.43it/s]\n",
|
||
|
"100%|##########| 297/297 [01:58<00:00, 2.87it/s]02/17/2022 17:25:16 - INFO - __main__ - Validation-set | bleu: 6.74998952187005 | accuracy: 1.0\n",
|
||
|
"02/17/2022 17:25:24 - INFO - __main__ - Test-set | bleu: 0.0 | accuracy: 1.0\n",
|
||
|
"Configuration saved in out/tweet/t5_version_2\\config.json\n",
|
||
|
"Model weights saved in out/tweet/t5_version_2\\pytorch_model.bin\n",
|
||
|
"tokenizer config file saved in out/tweet/t5_version_2\\tokenizer_config.json\n",
|
||
|
"Special tokens file saved in out/tweet/t5_version_2\\special_tokens_map.json\n",
|
||
|
"Copy vocab file to out/tweet/t5_version_2\\spiece.model\n",
|
||
|
"\n",
|
||
|
"100%|##########| 297/297 [02:15<00:00, 2.19it/s]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"!python run_translation_no_trainer.py \\\n",
|
||
|
" --model_name_or_path t5-small \\\n",
|
||
|
" --train_file data/translations-train.json \\\n",
|
||
|
" --validation_file data/translations-valid.json \\\n",
|
||
|
" --test_file data/translations-test.json \\\n",
|
||
|
" --per_device_train_batch_size 16 \\\n",
|
||
|
" --per_device_eval_batch_size 16 \\\n",
|
||
|
" --source_prefix \"tweet classification\" \\\n",
|
||
|
" --max_source_length 256 \\\n",
|
||
|
" --max_target_length 128 \\\n",
|
||
|
" --max_length 128 \\\n",
|
||
|
" --num_train_epochs 1 \\\n",
|
||
|
" --freeze_encoder \\\n",
|
||
|
" --output_dir out/tweet/t5_version_2"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# EVALUATING MODELS"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Roberta"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 17,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"02/17/2022 17:22:05 - WARNING - __main__ - Process rank: -1, device: cpu, n_gpu: 0distributed training: False, 16-bits training: False"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"\n",
|
||
|
" 0%| | 0/2 [00:00<?, ?it/s]\n",
|
||
|
"100%|##########| 2/2 [00:00<00:00, 143.26it/s]\n",
|
||
|
"[INFO|configuration_utils.py:586] 2022-02-17 17:22:05,892 >> loading configuration file out/tweet/roberta_version_2\\config.json\n",
|
||
|
"[INFO|configuration_utils.py:625] 2022-02-17 17:22:05,893 >> Model config RobertaConfig {\n",
|
||
|
" \"_name_or_path\": \"roberta-base\",\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"\n",
|
||
|
"02/17/2022 17:22:05 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n",
|
||
|
"_n_gpu=0,\n",
|
||
|
"adafactor=False,\n",
|
||
|
"adam_beta1=0.9,\n",
|
||
|
"adam_beta2=0.999,\n",
|
||
|
"adam_epsilon=1e-08,\n",
|
||
|
"dataloader_drop_last=False,\n",
|
||
|
"dataloader_num_workers=0,\n",
|
||
|
"dataloader_pin_memory=True,\n",
|
||
|
"ddp_find_unused_parameters=None,\n",
|
||
|
"debug=[],\n",
|
||
|
"deepspeed=None,\n",
|
||
|
"disable_tqdm=False,\n",
|
||
|
"do_eval=True,\n",
|
||
|
"do_predict=False,\n",
|
||
|
"do_train=False,\n",
|
||
|
"eval_accumulation_steps=None,\n",
|
||
|
"eval_steps=None,\n",
|
||
|
"evaluation_strategy=IntervalStrategy.NO,\n",
|
||
|
"fp16=False,\n",
|
||
|
"fp16_backend=auto,\n",
|
||
|
"fp16_full_eval=False,\n",
|
||
|
"fp16_opt_level=O1,\n",
|
||
|
"gradient_accumulation_steps=1,\n",
|
||
|
"gradient_checkpointing=False,\n",
|
||
|
"greater_is_better=None,\n",
|
||
|
"group_by_length=False,\n",
|
||
|
"hub_model_id=None,\n",
|
||
|
"hub_strategy=HubStrategy.EVERY_SAVE,\n",
|
||
|
"hub_token=<HUB_TOKEN>,\n",
|
||
|
"ignore_data_skip=False,\n",
|
||
|
"label_names=None,\n",
|
||
|
"label_smoothing_factor=0.0,\n",
|
||
|
"learning_rate=5e-05,\n",
|
||
|
"length_column_name=length,\n",
|
||
|
"load_best_model_at_end=False,\n",
|
||
|
"local_rank=-1,\n",
|
||
|
"log_level=-1,\n",
|
||
|
"log_level_replica=-1,\n",
|
||
|
"log_on_each_node=True,\n",
|
||
|
"logging_dir=out/tweet/roberta_version_2-evaluation\\runs\\Feb17_17-22-05_DESKTOP-K706NKK,\n",
|
||
|
"logging_first_step=False,\n",
|
||
|
"logging_nan_inf_filter=True,\n",
|
||
|
"logging_steps=500,\n",
|
||
|
"logging_strategy=IntervalStrategy.STEPS,\n",
|
||
|
"lr_scheduler_type=SchedulerType.LINEAR,\n",
|
||
|
"max_grad_norm=1.0,\n",
|
||
|
"max_steps=-1,\n",
|
||
|
"metric_for_best_model=None,\n",
|
||
|
"mp_parameters=,\n",
|
||
|
"no_cuda=False,\n",
|
||
|
"num_train_epochs=3.0,\n",
|
||
|
"output_dir=out/tweet/roberta_version_2-evaluation,\n",
|
||
|
"overwrite_output_dir=False,\n",
|
||
|
"past_index=-1,\n",
|
||
|
"per_device_eval_batch_size=24,\n",
|
||
|
"per_device_train_batch_size=8,\n",
|
||
|
"prediction_loss_only=False,\n",
|
||
|
"push_to_hub=False,\n",
|
||
|
"push_to_hub_model_id=None,\n",
|
||
|
"push_to_hub_organization=None,\n",
|
||
|
"push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
|
||
|
"remove_unused_columns=True,\n",
|
||
|
"report_to=[],\n",
|
||
|
"resume_from_checkpoint=None,\n",
|
||
|
"run_name=out/tweet/roberta_version_2-evaluation,\n",
|
||
|
"save_on_each_node=False,\n",
|
||
|
"save_steps=500,\n",
|
||
|
"save_strategy=IntervalStrategy.STEPS,\n",
|
||
|
"save_total_limit=None,\n",
|
||
|
"seed=42,\n",
|
||
|
"sharded_ddp=[],\n",
|
||
|
"skip_memory_metrics=True,\n",
|
||
|
"tpu_metrics_debug=False,\n",
|
||
|
"tpu_num_cores=None,\n",
|
||
|
"use_legacy_prediction_loop=False,\n",
|
||
|
"warmup_ratio=0.0,\n",
|
||
|
"warmup_steps=0,\n",
|
||
|
"weight_decay=0.0,\n",
|
||
|
"xpu_backend=None,\n",
|
||
|
")\n",
|
||
|
"02/17/2022 17:22:05 - INFO - __main__ - load a local file for train: data/train.json\n",
|
||
|
"02/17/2022 17:22:05 - INFO - __main__ - load a local file for validation: data/valid.json\n",
|
||
|
"02/17/2022 17:22:05 - WARNING - datasets.builder - Using custom data configuration default-f2672b914d9c5a33\n",
|
||
|
"02/17/2022 17:22:05 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n",
|
||
|
"02/17/2022 17:22:05 - INFO - datasets.info - Loading Dataset info from C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-f2672b914d9c5a33\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\n",
|
||
|
"02/17/2022 17:22:05 - WARNING - datasets.builder - Reusing dataset json (C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-f2672b914d9c5a33\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426)\n",
|
||
|
"02/17/2022 17:22:05 - INFO - datasets.info - Loading Dataset info from C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-f2672b914d9c5a33\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\n",
|
||
|
"02/17/2022 17:22:05 - INFO - __main__ - Return hidden states from model: True\n",
|
||
|
"02/17/2022 17:22:05 - INFO - __main__ - Using implementation from: RobertaForSequenceClassificationCustomAlternative\n",
|
||
|
"02/17/2022 17:22:07 - INFO - datasets.arrow_dataset - Caching processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-f2672b914d9c5a33\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-d1d24efe1f314f1d.arrow\n",
|
||
|
"02/17/2022 17:22:07 - INFO - datasets.arrow_dataset - Caching processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-f2672b914d9c5a33\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-74073ef035f90484.arrow\n",
|
||
|
"02/17/2022 17:22:08 - INFO - __main__ - *** Evaluate ***\n",
|
||
|
"***** eval metrics *****\n",
|
||
|
" eval_accuracy = 0.938\n",
|
||
|
" eval_loss = 0.673\n",
|
||
|
" eval_runtime = 0:00:46.31\n",
|
||
|
" eval_samples = 500\n",
|
||
|
" eval_samples_per_second = 10.795\n",
|
||
|
" eval_steps_per_second = 0.453\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
" \"architectures\": [\n",
|
||
|
" \"RobertaForSequenceClassification\"\n",
|
||
|
" ],\n",
|
||
|
" \"attention_probs_dropout_prob\": 0.1,\n",
|
||
|
" \"bos_token_id\": 0,\n",
|
||
|
" \"classifier_dropout\": null,\n",
|
||
|
" \"eos_token_id\": 2,\n",
|
||
|
" \"hidden_act\": \"gelu\",\n",
|
||
|
" \"hidden_dropout_prob\": 0.1,\n",
|
||
|
" \"hidden_size\": 768,\n",
|
||
|
" \"id2label\": {\n",
|
||
|
" \"0\": 0,\n",
|
||
|
" \"1\": 1\n",
|
||
|
" },\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"intermediate_size\": 3072,\n",
|
||
|
" \"label2id\": {\n",
|
||
|
" \"0\": 0,\n",
|
||
|
" \"1\": 1\n",
|
||
|
" },\n",
|
||
|
" \"layer_norm_eps\": 1e-05,\n",
|
||
|
" \"max_position_embeddings\": 514,\n",
|
||
|
" \"model_type\": \"roberta\",\n",
|
||
|
" \"num_attention_heads\": 12,\n",
|
||
|
" \"num_hidden_layers\": 12,\n",
|
||
|
" \"pad_token_id\": 1,\n",
|
||
|
" \"position_embedding_type\": \"absolute\",\n",
|
||
|
" \"problem_type\": \"single_label_classification\",\n",
|
||
|
" \"torch_dtype\": \"float32\",\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"type_vocab_size\": 1,\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50265\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"[INFO|tokenization_utils_base.py:1671] 2022-02-17 17:22:05,900 >> Didn't find file out/tweet/roberta_version_2\\added_tokens.json. We won't load it.\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-17 17:22:05,900 >> loading file out/tweet/roberta_version_2\\vocab.json\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-17 17:22:05,900 >> loading file out/tweet/roberta_version_2\\merges.txt\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-17 17:22:05,900 >> loading file out/tweet/roberta_version_2\\tokenizer.json\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-17 17:22:05,900 >> loading file None\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-17 17:22:05,900 >> loading file out/tweet/roberta_version_2\\special_tokens_map.json\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-17 17:22:05,900 >> loading file out/tweet/roberta_version_2\\tokenizer_config.json\n",
|
||
|
"[INFO|modeling_utils.py:1349] 2022-02-17 17:22:05,959 >> loading weights file out/tweet/roberta_version_2\\pytorch_model.bin\n",
|
||
|
"[WARNING|modeling_utils.py:1609] 2022-02-17 17:22:07,196 >> Some weights of the model checkpoint at out/tweet/roberta_version_2 were not used when initializing RobertaForSequenceClassificationCustomAlternative: ['classifier.dense.weight', 'classifier.dense.bias']\n",
|
||
|
"- This IS expected if you are initializing RobertaForSequenceClassificationCustomAlternative from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
|
||
|
"- This IS NOT expected if you are initializing RobertaForSequenceClassificationCustomAlternative from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
|
||
|
"[WARNING|modeling_utils.py:1620] 2022-02-17 17:22:07,196 >> Some weights of RobertaForSequenceClassificationCustomAlternative were not initialized from the model checkpoint at out/tweet/roberta_version_2 and are newly initialized: ['classifier.dense_1_hidden.weight', 'classifier.dense_2.weight', 'classifier.dense_1_input.weight', 'classifier.dense_1_hidden.bias', 'classifier.dense_1_input.bias', 'classifier.dense_2.bias']\n",
|
||
|
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/5 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 60%|###### | 3/5 [00:00<00:00, 22.77ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 5/5 [00:00<00:00, 26.11ba/s]\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/1 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 1/1 [00:00<00:00, 58.98ba/s]\n",
|
||
|
"[INFO|trainer.py:540] 2022-02-17 17:22:08,390 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassificationCustomAlternative.forward` and have been ignored: tweet.\n",
|
||
|
"[INFO|trainer.py:2243] 2022-02-17 17:22:08,392 >> ***** Running Evaluation *****\n",
|
||
|
"[INFO|trainer.py:2245] 2022-02-17 17:22:08,392 >> Num examples = 500\n",
|
||
|
"[INFO|trainer.py:2248] 2022-02-17 17:22:08,392 >> Batch size = 24\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/21 [00:00<?, ?it/s]\n",
|
||
|
" 10%|9 | 2/21 [00:02<00:20, 1.09s/it]\n",
|
||
|
" 14%|#4 | 3/21 [00:04<00:27, 1.55s/it]\n",
|
||
|
" 19%|#9 | 4/21 [00:06<00:30, 1.80s/it]\n",
|
||
|
" 24%|##3 | 5/21 [00:08<00:30, 1.94s/it]\n",
|
||
|
" 29%|##8 | 6/21 [00:11<00:30, 2.03s/it]\n",
|
||
|
" 33%|###3 | 7/21 [00:13<00:29, 2.09s/it]\n",
|
||
|
" 38%|###8 | 8/21 [00:15<00:27, 2.13s/it]\n",
|
||
|
" 43%|####2 | 9/21 [00:17<00:25, 2.16s/it]\n",
|
||
|
" 48%|####7 | 10/21 [00:19<00:24, 2.18s/it]\n",
|
||
|
" 52%|#####2 | 11/21 [00:22<00:21, 2.20s/it]\n",
|
||
|
" 57%|#####7 | 12/21 [00:24<00:20, 2.25s/it]\n",
|
||
|
" 62%|######1 | 13/21 [00:26<00:17, 2.25s/it]\n",
|
||
|
" 67%|######6 | 14/21 [00:28<00:15, 2.23s/it]\n",
|
||
|
" 71%|#######1 | 15/21 [00:31<00:13, 2.24s/it]\n",
|
||
|
" 76%|#######6 | 16/21 [00:33<00:11, 2.24s/it]\n",
|
||
|
" 81%|######## | 17/21 [00:35<00:08, 2.22s/it]\n",
|
||
|
" 86%|########5 | 18/21 [00:37<00:06, 2.23s/it]\n",
|
||
|
" 90%|######### | 19/21 [00:40<00:04, 2.21s/it]\n",
|
||
|
" 95%|#########5| 20/21 [00:42<00:02, 2.20s/it]\n",
|
||
|
"100%|##########| 21/21 [00:44<00:00, 2.11s/it]\n",
|
||
|
"100%|##########| 21/21 [00:44<00:00, 2.10s/it]\n",
|
||
|
"[INFO|modelcard.py:449] 2022-02-17 17:22:55,278 >> Dropping the following result as it does not have all the necessary fields:\n",
|
||
|
"{'task': {'name': 'Text Classification', 'type': 'text-classification'}}\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"#valid\n",
|
||
|
"!python run_glue.py \\\n",
|
||
|
"--model_name_or_path out/tweet/roberta_version_2 \\\n",
|
||
|
"--output_dir out/tweet/roberta_version_2-evaluation \\\n",
|
||
|
"--return_hidden_states --custom_model \\\n",
|
||
|
"--train_file data/train.json --validation_file data/valid.json \\\n",
|
||
|
"--do_eval \\\n",
|
||
|
"--per_device_eval_batch_size 24 --max_seq_length 128 \\\n",
|
||
|
"--return_hidden_states --custom_model"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 12,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"02/16/2022 01:12:34 - WARNING - __main__ - Process rank: -1, device: cpu, n_gpu: 0distributed training: False, 16-bits training: False\n",
|
||
|
"02/16/2022 01:12:34 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n",
|
||
|
"_n_gpu=0,\n",
|
||
|
"adafactor=False,\n",
|
||
|
"adam_beta1=0.9,\n",
|
||
|
"adam_beta2=0.999,\n",
|
||
|
"adam_epsilon=1e-08,\n",
|
||
|
"dataloader_drop_last=False,\n",
|
||
|
"dataloader_num_workers=0,\n",
|
||
|
"dataloader_pin_memory=True,\n",
|
||
|
"ddp_find_unused_parameters=None,"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"\n",
|
||
|
" 0%| | 0/2 [00:00<?, ?it/s]\n",
|
||
|
"100%|##########| 2/2 [00:00<00:00, 167.11it/s]\n",
|
||
|
"[INFO|configuration_utils.py:586] 2022-02-16 01:12:34,776 >> loading configuration file out/tweet/roberta_version_4\\config.json\n",
|
||
|
"[INFO|configuration_utils.py:625] 2022-02-16 01:12:34,776 >> Model config RobertaConfig {\n",
|
||
|
" \"_name_or_path\": \"roberta-base\",\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"RobertaForSequenceClassificationCustomAlternative\"\n",
|
||
|
" ],\n",
|
||
|
" \"attention_probs_dropout_prob\": 0.1,\n",
|
||
|
" \"bos_token_id\": 0,\n",
|
||
|
" \"classifier_dropout\": null,\n",
|
||
|
" \"eos_token_id\": 2,\n",
|
||
|
" \"hidden_act\": \"gelu\",\n",
|
||
|
" \"hidden_dropout_prob\": 0.1,\n",
|
||
|
" \"hidden_size\": 768,\n",
|
||
|
" \"id2label\": {\n",
|
||
|
" \"0\": 0,\n",
|
||
|
" \"1\": 1\n",
|
||
|
" },\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"intermediate_size\": 3072,\n",
|
||
|
" \"label2id\": {\n",
|
||
|
" \"0\": 0,\n",
|
||
|
" \"1\": 1\n",
|
||
|
" },\n",
|
||
|
" \"layer_norm_eps\": 1e-05,\n",
|
||
|
" \"max_position_embeddings\": 514,\n",
|
||
|
" \"model_type\": \"roberta\",\n",
|
||
|
" \"num_attention_heads\": 12,\n",
|
||
|
" \"num_hidden_layers\": 12,\n",
|
||
|
" \"pad_token_id\": 1,\n",
|
||
|
" \"position_embedding_type\": \"absolute\",\n",
|
||
|
" \"problem_type\": \"single_label_classification\",\n",
|
||
|
" \"torch_dtype\": \"float32\",\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"type_vocab_size\": 1,\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50265\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"[INFO|tokenization_utils_base.py:1671] 2022-02-16 01:12:34,779 >> Didn't find file out/tweet/roberta_version_4\\added_tokens.json. We won't load it.\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-16 01:12:34,779 >> loading file out/tweet/roberta_version_4\\vocab.json\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-16 01:12:34,779 >> loading file out/tweet/roberta_version_4\\merges.txt\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-16 01:12:34,779 >> loading file out/tweet/roberta_version_4\\tokenizer.json\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-16 01:12:34,779 >> loading file None\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-16 01:12:34,779 >> loading file out/tweet/roberta_version_4\\special_tokens_map.json\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-16 01:12:34,780 >> loading file out/tweet/roberta_version_4\\tokenizer_config.json\n",
|
||
|
"[INFO|modeling_utils.py:1349] 2022-02-16 01:12:34,829 >> loading weights file out/tweet/roberta_version_4\\pytorch_model.bin\n",
|
||
|
"[INFO|modeling_utils.py:1618] 2022-02-16 01:12:35,990 >> All model checkpoint weights were used when initializing RobertaForSequenceClassificationCustomAlternative.\n",
|
||
|
"\n",
|
||
|
"[INFO|modeling_utils.py:1626] 2022-02-16 01:12:35,990 >> All the weights of RobertaForSequenceClassificationCustomAlternative were initialized from the model checkpoint at out/tweet/roberta_version_4.\n",
|
||
|
"If your task is similar to the task the model of the checkpoint was trained on, you can already use RobertaForSequenceClassificationCustomAlternative for predictions without further training.\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/1 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 1/1 [00:00<00:00, 26.36ba/s]\n",
|
||
|
"[INFO|trainer.py:540] 2022-02-16 01:12:36,822 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassificationCustomAlternative.forward` and have been ignored: tweet.\n",
|
||
|
"[INFO|trainer.py:2243] 2022-02-16 01:12:36,823 >> ***** Running Evaluation *****\n",
|
||
|
"[INFO|trainer.py:2245] 2022-02-16 01:12:36,824 >> Num examples = 500\n",
|
||
|
"[INFO|trainer.py:2248] 2022-02-16 01:12:36,824 >> Batch size = 24\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/21 [00:00<?, ?it/s]\n",
|
||
|
" 10%|9 | 2/21 [00:02<00:20, 1.08s/it]\n",
|
||
|
" 14%|#4 | 3/21 [00:04<00:27, 1.52s/it]\n",
|
||
|
" 19%|#9 | 4/21 [00:06<00:29, 1.76s/it]\n",
|
||
|
" 24%|##3 | 5/21 [00:08<00:30, 1.91s/it]\n",
|
||
|
" 29%|##8 | 6/21 [00:10<00:30, 2.00s/it]\n",
|
||
|
" 33%|###3 | 7/21 [00:13<00:29, 2.07s/it]\n",
|
||
|
" 38%|###8 | 8/21 [00:15<00:27, 2.12s/it]\n",
|
||
|
" 43%|####2 | 9/21 [00:17<00:25, 2.14s/it]"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"\n",
|
||
|
"debug=[],\n",
|
||
|
"deepspeed=None,\n",
|
||
|
"disable_tqdm=False,\n",
|
||
|
"do_eval=True,\n",
|
||
|
"do_predict=False,\n",
|
||
|
"do_train=False,\n",
|
||
|
"eval_accumulation_steps=None,\n",
|
||
|
"eval_steps=None,\n",
|
||
|
"evaluation_strategy=IntervalStrategy.NO,\n",
|
||
|
"fp16=False,\n",
|
||
|
"fp16_backend=auto,\n",
|
||
|
"fp16_full_eval=False,\n",
|
||
|
"fp16_opt_level=O1,\n",
|
||
|
"gradient_accumulation_steps=1,\n",
|
||
|
"gradient_checkpointing=False,\n",
|
||
|
"greater_is_better=None,\n",
|
||
|
"group_by_length=False,\n",
|
||
|
"hub_model_id=None,\n",
|
||
|
"hub_strategy=HubStrategy.EVERY_SAVE,\n",
|
||
|
"hub_token=<HUB_TOKEN>,\n",
|
||
|
"ignore_data_skip=False,\n",
|
||
|
"label_names=None,\n",
|
||
|
"label_smoothing_factor=0.0,\n",
|
||
|
"learning_rate=5e-05,\n",
|
||
|
"length_column_name=length,\n",
|
||
|
"load_best_model_at_end=False,\n",
|
||
|
"local_rank=-1,\n",
|
||
|
"log_level=-1,\n",
|
||
|
"log_level_replica=-1,\n",
|
||
|
"log_on_each_node=True,\n",
|
||
|
"logging_dir=out/tweet/roberta_version_4-evaluation\\runs\\Feb16_01-12-34_DESKTOP-K706NKK,\n",
|
||
|
"logging_first_step=False,\n",
|
||
|
"logging_nan_inf_filter=True,\n",
|
||
|
"logging_steps=500,\n",
|
||
|
"logging_strategy=IntervalStrategy.STEPS,\n",
|
||
|
"lr_scheduler_type=SchedulerType.LINEAR,\n",
|
||
|
"max_grad_norm=1.0,\n",
|
||
|
"max_steps=-1,\n",
|
||
|
"metric_for_best_model=None,\n",
|
||
|
"mp_parameters=,\n",
|
||
|
"no_cuda=False,\n",
|
||
|
"num_train_epochs=3.0,\n",
|
||
|
"output_dir=out/tweet/roberta_version_4-evaluation,\n",
|
||
|
"overwrite_output_dir=False,\n",
|
||
|
"past_index=-1,\n",
|
||
|
"per_device_eval_batch_size=24,\n",
|
||
|
"per_device_train_batch_size=8,\n",
|
||
|
"prediction_loss_only=False,\n",
|
||
|
"push_to_hub=False,\n",
|
||
|
"push_to_hub_model_id=None,\n",
|
||
|
"push_to_hub_organization=None,\n",
|
||
|
"push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
|
||
|
"remove_unused_columns=True,\n",
|
||
|
"report_to=[],\n",
|
||
|
"resume_from_checkpoint=None,\n",
|
||
|
"run_name=out/tweet/roberta_version_4-evaluation,\n",
|
||
|
"save_on_each_node=False,\n",
|
||
|
"save_steps=500,\n",
|
||
|
"save_strategy=IntervalStrategy.STEPS,\n",
|
||
|
"save_total_limit=None,\n",
|
||
|
"seed=42,\n",
|
||
|
"sharded_ddp=[],\n",
|
||
|
"skip_memory_metrics=True,\n",
|
||
|
"tpu_metrics_debug=False,\n",
|
||
|
"tpu_num_cores=None,\n",
|
||
|
"use_legacy_prediction_loop=False,\n",
|
||
|
"warmup_ratio=0.0,\n",
|
||
|
"warmup_steps=0,\n",
|
||
|
"weight_decay=0.0,\n",
|
||
|
"xpu_backend=None,\n",
|
||
|
")\n",
|
||
|
"02/16/2022 01:12:34 - INFO - __main__ - load a local file for train: data/train.json\n",
|
||
|
"02/16/2022 01:12:34 - INFO - __main__ - load a local file for validation: data/test.json\n",
|
||
|
"02/16/2022 01:12:34 - WARNING - datasets.builder - Using custom data configuration default-aa408910693fa782\n",
|
||
|
"02/16/2022 01:12:34 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n",
|
||
|
"02/16/2022 01:12:34 - INFO - datasets.info - Loading Dataset info from C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-aa408910693fa782\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\n",
|
||
|
"02/16/2022 01:12:34 - WARNING - datasets.builder - Reusing dataset json (C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-aa408910693fa782\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426)\n",
|
||
|
"02/16/2022 01:12:34 - INFO - datasets.info - Loading Dataset info from C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-aa408910693fa782\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\n",
|
||
|
"02/16/2022 01:12:34 - INFO - __main__ - Return hidden states from model: True\n",
|
||
|
"02/16/2022 01:12:34 - INFO - __main__ - Using implementation from: RobertaForSequenceClassificationCustomAlternative\n",
|
||
|
"02/16/2022 01:12:36 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-aa408910693fa782\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-7c7dda0a4623bcbe.arrow\n",
|
||
|
"02/16/2022 01:12:36 - INFO - datasets.arrow_dataset - Caching processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-aa408910693fa782\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-eec123a569b1837d.arrow\n",
|
||
|
"02/16/2022 01:12:36 - INFO - __main__ - *** Evaluate ***\n",
|
||
|
"***** eval metrics *****\n",
|
||
|
" eval_accuracy = 1.0\n",
|
||
|
" eval_loss = 0.6472\n",
|
||
|
" eval_runtime = 0:00:45.49\n",
|
||
|
" eval_samples = 500\n",
|
||
|
" eval_samples_per_second = 10.991\n",
|
||
|
" eval_steps_per_second = 0.462\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"\n",
|
||
|
" 48%|####7 | 10/21 [00:19<00:23, 2.17s/it]\n",
|
||
|
" 52%|#####2 | 11/21 [00:21<00:21, 2.16s/it]\n",
|
||
|
" 57%|#####7 | 12/21 [00:24<00:19, 2.18s/it]\n",
|
||
|
" 62%|######1 | 13/21 [00:26<00:17, 2.18s/it]\n",
|
||
|
" 67%|######6 | 14/21 [00:28<00:15, 2.18s/it]\n",
|
||
|
" 71%|#######1 | 15/21 [00:30<00:13, 2.17s/it]\n",
|
||
|
" 76%|#######6 | 16/21 [00:32<00:10, 2.18s/it]\n",
|
||
|
" 81%|######## | 17/21 [00:34<00:08, 2.19s/it]\n",
|
||
|
" 86%|########5 | 18/21 [00:37<00:06, 2.19s/it]\n",
|
||
|
" 90%|######### | 19/21 [00:39<00:04, 2.19s/it]\n",
|
||
|
" 95%|#########5| 20/21 [00:41<00:02, 2.17s/it]\n",
|
||
|
"100%|##########| 21/21 [00:43<00:00, 2.06s/it]\n",
|
||
|
"100%|##########| 21/21 [00:43<00:00, 2.06s/it]\n",
|
||
|
"[INFO|modelcard.py:449] 2022-02-16 01:13:22,843 >> Dropping the following result as it does not have all the necessary fields:\n",
|
||
|
"{'task': {'name': 'Text Classification', 'type': 'text-classification'}}\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"#test\n",
|
||
|
"!python run_glue.py \\\n",
|
||
|
"--model_name_or_path out/tweet/roberta_version_4 \\\n",
|
||
|
"--output_dir out/tweet/roberta_version_4-evaluation \\\n",
|
||
|
"--return_hidden_states --custom_model \\\n",
|
||
|
"--train_file data/train.json --validation_file data/test.json \\\n",
|
||
|
"--do_eval \\\n",
|
||
|
"--per_device_eval_batch_size 24 --max_seq_length 128 \\\n",
|
||
|
"--return_hidden_states --custom_model"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# GPT2"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 19,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"02/17/2022 17:25:29 - WARNING - __main__ - Process rank: -1, device: cpu, n_gpu: 0distributed training: False, 16-bits training: False\n",
|
||
|
"02/17/2022 17:25:29 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n",
|
||
|
"_n_gpu=0,\n",
|
||
|
"adafactor=False,\n",
|
||
|
"adam_beta1=0.9,\n",
|
||
|
"adam_beta2=0.999,\n",
|
||
|
"adam_epsilon=1e-08,\n",
|
||
|
"dataloader_drop_last=False,\n",
|
||
|
"dataloader_num_workers=0,\n",
|
||
|
"dataloader_pin_memory=True,\n",
|
||
|
"ddp_find_unused_parameters=None,\n",
|
||
|
"debug=[],\n",
|
||
|
"deepspeed=None,\n",
|
||
|
"disable_tqdm=False,\n",
|
||
|
"do_eval=True,\n",
|
||
|
"do_predict=False,\n",
|
||
|
"do_train=False,\n",
|
||
|
"eval_accumulation_steps=None,\n",
|
||
|
"eval_steps=None,\n",
|
||
|
"evaluation_strategy=IntervalStrategy.NO,\n",
|
||
|
"fp16=False,\n",
|
||
|
"fp16_backend=auto,\n",
|
||
|
"fp16_full_eval=False,\n",
|
||
|
"fp16_opt_level=O1,\n",
|
||
|
"gradient_accumulation_steps=1,\n",
|
||
|
"gradient_checkpointing=False,\n",
|
||
|
"greater_is_better=None,\n",
|
||
|
"group_by_length=False,\n",
|
||
|
"hub_model_id=None,\n",
|
||
|
"hub_strategy=HubStrategy.EVERY_SAVE,\n",
|
||
|
"hub_token=<HUB_TOKEN>,\n",
|
||
|
"ignore_data_skip=False,\n",
|
||
|
"label_names=None,\n",
|
||
|
"label_smoothing_factor=0.0,\n",
|
||
|
"learning_rate=5e-05,\n",
|
||
|
"length_column_name=length,\n",
|
||
|
"load_best_model_at_end=False,\n",
|
||
|
"local_rank=-1,\n",
|
||
|
"log_level=-1,\n",
|
||
|
"log_level_replica=-1,\n",
|
||
|
"log_on_each_node=True,\n",
|
||
|
"logging_dir=out/tweet/gpt2_version_2-evaluation\\runs\\Feb17_17-25-29_DESKTOP-K706NKK,\n",
|
||
|
"logging_first_step=False,\n",
|
||
|
"logging_nan_inf_filter=True,\n",
|
||
|
"logging_steps=500,\n",
|
||
|
"logging_strategy=IntervalStrategy.STEPS,\n",
|
||
|
"lr_scheduler_type=SchedulerType.LINEAR,\n",
|
||
|
"max_grad_norm=1.0,\n",
|
||
|
"max_steps=-1,\n",
|
||
|
"metric_for_best_model=None,\n",
|
||
|
"mp_parameters=,\n",
|
||
|
"no_cuda=False,\n",
|
||
|
"num_train_epochs=3.0,\n",
|
||
|
"output_dir=out/tweet/gpt2_version_2-evaluation,\n",
|
||
|
"overwrite_output_dir=False,\n",
|
||
|
"past_index=-1,\n",
|
||
|
"per_device_eval_batch_size=24,\n",
|
||
|
"per_device_train_batch_size=8,\n",
|
||
|
"prediction_loss_only=False,\n",
|
||
|
"push_to_hub=False,\n",
|
||
|
"push_to_hub_model_id=None,\n",
|
||
|
"push_to_hub_organization=None,\n",
|
||
|
"push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
|
||
|
"remove_unused_columns=True,\n",
|
||
|
"report_to=[],\n",
|
||
|
"resume_from_checkpoint=None,\n",
|
||
|
"run_name=out/tweet/gpt2_version_2-evaluation,\n",
|
||
|
"save_on_each_node=False,\n",
|
||
|
"save_steps=500,\n",
|
||
|
"save_strategy=IntervalStrategy.STEPS,\n",
|
||
|
"save_total_limit=None,\n",
|
||
|
"seed=42,\n",
|
||
|
"sharded_ddp=[],\n",
|
||
|
"skip_memory_metrics=True,\n",
|
||
|
"tpu_metrics_debug=False,\n",
|
||
|
"tpu_num_cores=None,\n",
|
||
|
"use_legacy_prediction_loop=False,\n",
|
||
|
"warmup_ratio=0.0,\n",
|
||
|
"warmup_steps=0,\n",
|
||
|
"weight_decay=0.0,\n",
|
||
|
"xpu_backend=None,\n",
|
||
|
")\n",
|
||
|
"02/17/2022 17:25:29 - INFO - __main__ - load a local file for train: data/train.json\n",
|
||
|
"02/17/2022 17:25:29 - INFO - __main__ - load a local file for validation: data/valid.json\n",
|
||
|
"02/17/2022 17:25:29 - WARNING - datasets.builder - Using custom data configuration default-f2672b914d9c5a33\n",
|
||
|
"02/17/2022 17:25:29 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n",
|
||
|
"02/17/2022 17:25:29 - INFO - datasets.info - Loading Dataset info from C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-f2672b914d9c5a33\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\n",
|
||
|
"02/17/2022 17:25:29 - WARNING - datasets.builder - Reusing dataset json (C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-f2672b914d9c5a33\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426)\n",
|
||
|
"02/17/2022 17:25:29 - INFO - datasets.info - Loading Dataset info from C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-f2672b914d9c5a33\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\n",
|
||
|
"02/17/2022 17:25:29 - INFO - __main__ - Return hidden states from model: True\n",
|
||
|
"02/17/2022 17:25:29 - INFO - __main__ - Using implementation from: GPT2ForSequenceClassificationCustom\n",
|
||
|
"02/17/2022 17:25:31 - INFO - datasets.arrow_dataset - Caching processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-f2672b914d9c5a33\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-212f78cac2ca92a1.arrow\n",
|
||
|
"02/17/2022 17:25:31 - INFO - datasets.arrow_dataset - Caching processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-f2672b914d9c5a33\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-95c22eb06b0faad8.arrow\n",
|
||
|
"02/17/2022 17:25:32 - INFO - __main__ - *** Evaluate ***\n",
|
||
|
"***** eval metrics *****\n",
|
||
|
" eval_accuracy = 0.938\n",
|
||
|
" eval_loss = 0.4886\n",
|
||
|
" eval_runtime = 0:01:01.53\n",
|
||
|
" eval_samples = 500\n",
|
||
|
" eval_samples_per_second = 8.126\n",
|
||
|
" eval_steps_per_second = 0.341\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"\n",
|
||
|
" 0%| | 0/2 [00:00<?, ?it/s]\n",
|
||
|
"100%|##########| 2/2 [00:00<00:00, 2018.43it/s]\n",
|
||
|
"[INFO|configuration_utils.py:586] 2022-02-17 17:25:29,863 >> loading configuration file out/tweet/gpt2_version_2\\config.json\n",
|
||
|
"[INFO|configuration_utils.py:625] 2022-02-17 17:25:29,864 >> Model config GPT2Config {\n",
|
||
|
" \"_name_or_path\": \"gpt2\",\n",
|
||
|
" \"activation_function\": \"gelu_new\",\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"GPT2ForSequenceClassification\"\n",
|
||
|
" ],\n",
|
||
|
" \"attn_pdrop\": 0.1,\n",
|
||
|
" \"bos_token_id\": 50256,\n",
|
||
|
" \"embd_pdrop\": 0.1,\n",
|
||
|
" \"eos_token_id\": 50256,\n",
|
||
|
" \"id2label\": {\n",
|
||
|
" \"0\": 0,\n",
|
||
|
" \"1\": 1\n",
|
||
|
" },\n",
|
||
|
" \"initializer_range\": 0.02,\n",
|
||
|
" \"label2id\": {\n",
|
||
|
" \"0\": 0,\n",
|
||
|
" \"1\": 1\n",
|
||
|
" },\n",
|
||
|
" \"layer_norm_epsilon\": 1e-05,\n",
|
||
|
" \"model_type\": \"gpt2\",\n",
|
||
|
" \"n_ctx\": 1024,\n",
|
||
|
" \"n_embd\": 768,\n",
|
||
|
" \"n_head\": 12,\n",
|
||
|
" \"n_inner\": null,\n",
|
||
|
" \"n_layer\": 12,\n",
|
||
|
" \"n_positions\": 1024,\n",
|
||
|
" \"pad_token_id\": 50256,\n",
|
||
|
" \"reorder_and_upcast_attn\": false,\n",
|
||
|
" \"resid_pdrop\": 0.1,\n",
|
||
|
" \"scale_attn_by_inverse_layer_idx\": false,\n",
|
||
|
" \"scale_attn_weights\": true,\n",
|
||
|
" \"summary_activation\": null,\n",
|
||
|
" \"summary_first_dropout\": 0.1,\n",
|
||
|
" \"summary_proj_to_labels\": true,\n",
|
||
|
" \"summary_type\": \"cls_index\",\n",
|
||
|
" \"summary_use_proj\": true,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"text-generation\": {\n",
|
||
|
" \"do_sample\": true,\n",
|
||
|
" \"max_length\": 50\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"torch_dtype\": \"float32\",\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 50257\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"[INFO|tokenization_utils_base.py:1671] 2022-02-17 17:25:29,868 >> Didn't find file out/tweet/gpt2_version_2\\added_tokens.json. We won't load it.\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-17 17:25:29,869 >> loading file out/tweet/gpt2_version_2\\vocab.json\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-17 17:25:29,869 >> loading file out/tweet/gpt2_version_2\\merges.txt\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-17 17:25:29,869 >> loading file out/tweet/gpt2_version_2\\tokenizer.json\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-17 17:25:29,869 >> loading file None\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-17 17:25:29,869 >> loading file out/tweet/gpt2_version_2\\special_tokens_map.json\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-17 17:25:29,869 >> loading file out/tweet/gpt2_version_2\\tokenizer_config.json\n",
|
||
|
"[INFO|modeling_utils.py:1349] 2022-02-17 17:25:29,927 >> loading weights file out/tweet/gpt2_version_2\\pytorch_model.bin\n",
|
||
|
"[WARNING|modeling_utils.py:1609] 2022-02-17 17:25:31,677 >> Some weights of the model checkpoint at out/tweet/gpt2_version_2 were not used when initializing GPT2ForSequenceClassificationCustom: ['score.weight']\n",
|
||
|
"- This IS expected if you are initializing GPT2ForSequenceClassificationCustom from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
|
||
|
"- This IS NOT expected if you are initializing GPT2ForSequenceClassificationCustom from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
|
||
|
"[WARNING|modeling_utils.py:1620] 2022-02-17 17:25:31,677 >> Some weights of GPT2ForSequenceClassificationCustom were not initialized from the model checkpoint at out/tweet/gpt2_version_2 and are newly initialized: ['score.out_proj.weight', 'score.dense_1_input.bias', 'score.dense_1_hidden.bias', 'score.dense_2.weight', 'score.dense_2.bias', 'score.dense_1_hidden.weight', 'score.dense_1_input.weight']\n",
|
||
|
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/5 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 40%|#### | 2/5 [00:00<00:00, 18.16ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 5/5 [00:00<00:00, 25.52ba/s]\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on dataset: 0%| | 0/1 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on dataset: 100%|##########| 1/1 [00:00<00:00, 58.98ba/s]\n",
|
||
|
"[INFO|trainer.py:540] 2022-02-17 17:25:32,736 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: tweet.\n",
|
||
|
"[INFO|trainer.py:2243] 2022-02-17 17:25:32,737 >> ***** Running Evaluation *****\n",
|
||
|
"[INFO|trainer.py:2245] 2022-02-17 17:25:32,737 >> Num examples = 500\n",
|
||
|
"[INFO|trainer.py:2248] 2022-02-17 17:25:32,737 >> Batch size = 24\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/21 [00:00<?, ?it/s]\n",
|
||
|
" 10%|9 | 2/21 [00:02<00:28, 1.48s/it]\n",
|
||
|
" 14%|#4 | 3/21 [00:05<00:37, 2.08s/it]\n",
|
||
|
" 19%|#9 | 4/21 [00:08<00:40, 2.40s/it]\n",
|
||
|
" 24%|##3 | 5/21 [00:11<00:41, 2.60s/it]\n",
|
||
|
" 29%|##8 | 6/21 [00:14<00:40, 2.70s/it]\n",
|
||
|
" 33%|###3 | 7/21 [00:17<00:38, 2.77s/it]\n",
|
||
|
" 38%|###8 | 8/21 [00:20<00:36, 2.81s/it]\n",
|
||
|
" 43%|####2 | 9/21 [00:23<00:34, 2.86s/it]\n",
|
||
|
" 48%|####7 | 10/21 [00:26<00:31, 2.89s/it]\n",
|
||
|
" 52%|#####2 | 11/21 [00:29<00:29, 2.90s/it]\n",
|
||
|
" 57%|#####7 | 12/21 [00:32<00:26, 2.91s/it]\n",
|
||
|
" 62%|######1 | 13/21 [00:35<00:23, 2.94s/it]\n",
|
||
|
" 67%|######6 | 14/21 [00:38<00:20, 2.96s/it]\n",
|
||
|
" 71%|#######1 | 15/21 [00:41<00:17, 2.97s/it]\n",
|
||
|
" 76%|#######6 | 16/21 [00:44<00:14, 2.95s/it]\n",
|
||
|
" 81%|######## | 17/21 [00:47<00:11, 2.95s/it]\n",
|
||
|
" 86%|########5 | 18/21 [00:50<00:08, 2.96s/it]\n",
|
||
|
" 90%|######### | 19/21 [00:53<00:05, 2.96s/it]\n",
|
||
|
" 95%|#########5| 20/21 [00:56<00:02, 2.97s/it]\n",
|
||
|
"100%|##########| 21/21 [00:58<00:00, 2.83s/it]\n",
|
||
|
"100%|##########| 21/21 [00:58<00:00, 2.79s/it]\n",
|
||
|
"[INFO|modelcard.py:449] 2022-02-17 17:26:34,864 >> Dropping the following result as it does not have all the necessary fields:\n",
|
||
|
"{'task': {'name': 'Text Classification', 'type': 'text-classification'}}\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"#valid\n",
|
||
|
"!python run_glue.py \\\n",
|
||
|
"--model_name_or_path out/tweet/gpt2_version_2 \\\n",
|
||
|
"--output_dir out/tweet/gpt2_version_2-evaluation \\\n",
|
||
|
"--return_hidden_states --custom_model \\\n",
|
||
|
"--train_file data/train.json --validation_file data/valid.json \\\n",
|
||
|
"--do_eval \\\n",
|
||
|
"--per_device_eval_batch_size 24 --max_seq_length 128 \\\n",
|
||
|
"--return_hidden_states --custom_model"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# T5"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 26,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"02/17/2022 17:36:52 - WARNING - __main__ - Process rank: -1, device: cpu, n_gpu: 0distributed training: False, 16-bits training: False\n",
|
||
|
"02/17/2022 17:36:52 - INFO - __main__ - Training/evaluation parameters Seq2SeqTrainingArguments(\n",
|
||
|
"_n_gpu=0,\n",
|
||
|
"adafactor=False,\n",
|
||
|
"adam_beta1=0.9,\n",
|
||
|
"adam_beta2=0.999,\n",
|
||
|
"adam_epsilon=1e-08,\n",
|
||
|
"dataloader_drop_last=False,\n",
|
||
|
"dataloader_num_workers=0,\n",
|
||
|
"dataloader_pin_memory=True,\n",
|
||
|
"ddp_find_unused_parameters=None,\n",
|
||
|
"debug=[],\n",
|
||
|
"deepspeed=None,\n",
|
||
|
"disable_tqdm=False,\n",
|
||
|
"do_eval=True,\n",
|
||
|
"do_predict=False,\n",
|
||
|
"do_train=False,\n",
|
||
|
"eval_accumulation_steps=None,\n",
|
||
|
"eval_steps=None,\n",
|
||
|
"evaluation_strategy=IntervalStrategy.NO,\n",
|
||
|
"fp16=False,\n",
|
||
|
"fp16_backend=auto,\n",
|
||
|
"fp16_full_eval=False,\n",
|
||
|
"fp16_opt_level=O1,\n",
|
||
|
"generation_max_length=None,\n",
|
||
|
"generation_num_beams=None,\n",
|
||
|
"gradient_accumulation_steps=1,\n",
|
||
|
"gradient_checkpointing=False,\n",
|
||
|
"greater_is_better=None,\n",
|
||
|
"group_by_length=False,\n",
|
||
|
"hub_model_id=None,\n",
|
||
|
"hub_strategy=HubStrategy.EVERY_SAVE,\n",
|
||
|
"hub_token=<HUB_TOKEN>,\n",
|
||
|
"ignore_data_skip=False,\n",
|
||
|
"label_names=None,\n",
|
||
|
"label_smoothing_factor=0.0,\n",
|
||
|
"learning_rate=5e-05,\n",
|
||
|
"length_column_name=length,\n",
|
||
|
"load_best_model_at_end=False,\n",
|
||
|
"local_rank=-1,\n",
|
||
|
"log_level=-1,\n",
|
||
|
"log_level_replica=-1,\n",
|
||
|
"log_on_each_node=True,\n",
|
||
|
"logging_dir=out/tweet/t5-evaluation\\runs\\Feb17_17-36-52_DESKTOP-K706NKK,\n",
|
||
|
"logging_first_step=False,\n",
|
||
|
"logging_nan_inf_filter=True,\n",
|
||
|
"logging_steps=500,\n",
|
||
|
"logging_strategy=IntervalStrategy.STEPS,\n",
|
||
|
"lr_scheduler_type=SchedulerType.LINEAR,\n",
|
||
|
"max_grad_norm=1.0,\n",
|
||
|
"max_steps=-1,\n",
|
||
|
"metric_for_best_model=None,\n",
|
||
|
"mp_parameters=,\n",
|
||
|
"no_cuda=False,\n",
|
||
|
"num_train_epochs=3.0,\n",
|
||
|
"output_dir=out/tweet/t5-evaluation,\n",
|
||
|
"overwrite_output_dir=False,\n",
|
||
|
"past_index=-1,\n",
|
||
|
"per_device_eval_batch_size=16,\n",
|
||
|
"per_device_train_batch_size=8,\n",
|
||
|
"predict_with_generate=True,\n",
|
||
|
"prediction_loss_only=False,\n",
|
||
|
"push_to_hub=False,\n",
|
||
|
"push_to_hub_model_id=None,\n",
|
||
|
"push_to_hub_organization=None,\n",
|
||
|
"push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
|
||
|
"remove_unused_columns=True,\n",
|
||
|
"report_to=[],\n",
|
||
|
"resume_from_checkpoint=None,\n",
|
||
|
"run_name=out/tweet/t5-evaluation,\n",
|
||
|
"save_on_each_node=False,\n",
|
||
|
"save_steps=500,\n",
|
||
|
"save_strategy=IntervalStrategy.STEPS,\n",
|
||
|
"save_total_limit=None,\n",
|
||
|
"seed=42,\n",
|
||
|
"sharded_ddp=[],\n",
|
||
|
"skip_memory_metrics=True,\n",
|
||
|
"sortish_sampler=False,\n",
|
||
|
"tpu_metrics_debug=False,\n",
|
||
|
"tpu_num_cores=None,\n",
|
||
|
"use_legacy_prediction_loop=False,\n",
|
||
|
"warmup_ratio=0.0,\n",
|
||
|
"warmup_steps=0,\n",
|
||
|
"weight_decay=0.0,\n",
|
||
|
"xpu_backend=None,\n",
|
||
|
")\n",
|
||
|
"02/17/2022 17:36:52 - WARNING - datasets.builder - Using custom data configuration default-6d5bc754bbaa91d7\n",
|
||
|
"02/17/2022 17:36:52 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n",
|
||
|
"02/17/2022 17:36:52 - INFO - datasets.info - Loading Dataset info from C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-6d5bc754bbaa91d7\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\n",
|
||
|
"02/17/2022 17:36:52 - WARNING - datasets.builder - Reusing dataset json (C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-6d5bc754bbaa91d7\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426)\n",
|
||
|
"02/17/2022 17:36:52 - INFO - datasets.info - Loading Dataset info from C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-6d5bc754bbaa91d7\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\n",
|
||
|
"02/17/2022 17:36:53 - INFO - __main__ - Using translation prefix: \"tweet classification: \"\n",
|
||
|
"02/17/2022 17:36:53 - INFO - datasets.arrow_dataset - Caching processed dataset at C:\\Users\\Foka\\.cache\\huggingface\\datasets\\json\\default-6d5bc754bbaa91d7\\0.0.0\\c2d554c3377ea79c7664b93dc65d0803b45e3279000f993c7bfd18937fd7f426\\cache-96f3d337ad66e082.arrow\n",
|
||
|
"02/17/2022 17:36:55 - INFO - __main__ - *** Evaluate ***\n",
|
||
|
"02/17/2022 17:37:09 - INFO - datasets.metric - Removing C:\\Users\\Foka\\.cache\\huggingface\\metrics\\accuracy\\default\\default_experiment-1-0.arrow\n",
|
||
|
"02/17/2022 17:37:09 - INFO - datasets.metric - Removing C:\\Users\\Foka\\.cache\\huggingface\\metrics\\sacrebleu\\default\\default_experiment-1-0.arrow\n",
|
||
|
"***** eval metrics *****\n",
|
||
|
" eval_accuracy = 1.0\n",
|
||
|
" eval_bleu = 0.0\n",
|
||
|
" eval_gen_len = 2.272\n",
|
||
|
" eval_loss = 0.5538\n",
|
||
|
" eval_runtime = 0:00:14.42\n",
|
||
|
" eval_samples = 500\n",
|
||
|
" eval_samples_per_second = 34.659\n",
|
||
|
" eval_steps_per_second = 2.218\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"\n",
|
||
|
" 0%| | 0/2 [00:00<?, ?it/s]\n",
|
||
|
"100%|##########| 2/2 [00:00<00:00, 2020.86it/s]\n",
|
||
|
"[INFO|configuration_utils.py:586] 2022-02-17 17:36:52,675 >> loading configuration file out/tweet/t5_version_2\\config.json\n",
|
||
|
"[INFO|configuration_utils.py:625] 2022-02-17 17:36:52,677 >> Model config T5Config {\n",
|
||
|
" \"_name_or_path\": \"t5-small\",\n",
|
||
|
" \"architectures\": [\n",
|
||
|
" \"T5ForConditionalGeneration\"\n",
|
||
|
" ],\n",
|
||
|
" \"d_ff\": 2048,\n",
|
||
|
" \"d_kv\": 64,\n",
|
||
|
" \"d_model\": 512,\n",
|
||
|
" \"decoder_start_token_id\": 0,\n",
|
||
|
" \"dropout_rate\": 0.1,\n",
|
||
|
" \"eos_token_id\": 1,\n",
|
||
|
" \"feed_forward_proj\": \"relu\",\n",
|
||
|
" \"initializer_factor\": 1.0,\n",
|
||
|
" \"is_encoder_decoder\": true,\n",
|
||
|
" \"layer_norm_epsilon\": 1e-06,\n",
|
||
|
" \"model_type\": \"t5\",\n",
|
||
|
" \"n_positions\": 512,\n",
|
||
|
" \"num_decoder_layers\": 6,\n",
|
||
|
" \"num_heads\": 8,\n",
|
||
|
" \"num_layers\": 6,\n",
|
||
|
" \"output_past\": true,\n",
|
||
|
" \"pad_token_id\": 0,\n",
|
||
|
" \"relative_attention_num_buckets\": 32,\n",
|
||
|
" \"task_specific_params\": {\n",
|
||
|
" \"summarization\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"length_penalty\": 2.0,\n",
|
||
|
" \"max_length\": 200,\n",
|
||
|
" \"min_length\": 30,\n",
|
||
|
" \"no_repeat_ngram_size\": 3,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"summarize: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_de\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to German: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_fr\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to French: \"\n",
|
||
|
" },\n",
|
||
|
" \"translation_en_to_ro\": {\n",
|
||
|
" \"early_stopping\": true,\n",
|
||
|
" \"max_length\": 300,\n",
|
||
|
" \"num_beams\": 4,\n",
|
||
|
" \"prefix\": \"translate English to Romanian: \"\n",
|
||
|
" }\n",
|
||
|
" },\n",
|
||
|
" \"torch_dtype\": \"float32\",\n",
|
||
|
" \"transformers_version\": \"4.12.5\",\n",
|
||
|
" \"use_cache\": true,\n",
|
||
|
" \"vocab_size\": 32100\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"[INFO|tokenization_utils_base.py:1671] 2022-02-17 17:36:52,677 >> Didn't find file out/tweet/t5_version_2\\added_tokens.json. We won't load it.\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-17 17:36:52,677 >> loading file out/tweet/t5_version_2\\spiece.model\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-17 17:36:52,677 >> loading file out/tweet/t5_version_2\\tokenizer.json\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-17 17:36:52,677 >> loading file None\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-17 17:36:52,677 >> loading file out/tweet/t5_version_2\\special_tokens_map.json\n",
|
||
|
"[INFO|tokenization_utils_base.py:1740] 2022-02-17 17:36:52,677 >> loading file out/tweet/t5_version_2\\tokenizer_config.json\n",
|
||
|
"[INFO|modeling_utils.py:1349] 2022-02-17 17:36:52,771 >> loading weights file out/tweet/t5_version_2\\pytorch_model.bin\n",
|
||
|
"[INFO|modeling_utils.py:1618] 2022-02-17 17:36:53,190 >> All model checkpoint weights were used when initializing T5ForConditionalGeneration.\n",
|
||
|
"\n",
|
||
|
"[INFO|modeling_utils.py:1626] 2022-02-17 17:36:53,190 >> All the weights of T5ForConditionalGeneration were initialized from the model checkpoint at out/tweet/t5_version_2.\n",
|
||
|
"If your task is similar to the task the model of the checkpoint was trained on, you can already use T5ForConditionalGeneration for predictions without further training.\n",
|
||
|
"\n",
|
||
|
"Running tokenizer on validation dataset: 0%| | 0/1 [00:00<?, ?ba/s]\n",
|
||
|
"Running tokenizer on validation dataset: 100%|##########| 1/1 [00:00<00:00, 34.57ba/s]\n",
|
||
|
"[INFO|trainer.py:2243] 2022-02-17 17:36:55,016 >> ***** Running Evaluation *****\n",
|
||
|
"[INFO|trainer.py:2245] 2022-02-17 17:36:55,016 >> Num examples = 500\n",
|
||
|
"[INFO|trainer.py:2248] 2022-02-17 17:36:55,016 >> Batch size = 16\n",
|
||
|
"\n",
|
||
|
" 0%| | 0/32 [00:00<?, ?it/s]\n",
|
||
|
" 6%|6 | 2/32 [00:00<00:06, 4.31it/s]\n",
|
||
|
" 9%|9 | 3/32 [00:01<00:10, 2.75it/s]\n",
|
||
|
" 12%|#2 | 4/32 [00:01<00:10, 2.57it/s]\n",
|
||
|
" 16%|#5 | 5/32 [00:01<00:11, 2.43it/s]\n",
|
||
|
" 19%|#8 | 6/32 [00:02<00:11, 2.32it/s]\n",
|
||
|
" 22%|##1 | 7/32 [00:02<00:11, 2.14it/s]\n",
|
||
|
" 25%|##5 | 8/32 [00:03<00:11, 2.11it/s]\n",
|
||
|
" 28%|##8 | 9/32 [00:03<00:10, 2.12it/s]\n",
|
||
|
" 31%|###1 | 10/32 [00:04<00:09, 2.20it/s]\n",
|
||
|
" 34%|###4 | 11/32 [00:04<00:09, 2.33it/s]\n",
|
||
|
" 38%|###7 | 12/32 [00:05<00:08, 2.26it/s]\n",
|
||
|
" 41%|#### | 13/32 [00:05<00:08, 2.23it/s]\n",
|
||
|
" 44%|####3 | 14/32 [00:06<00:08, 2.23it/s]\n",
|
||
|
" 47%|####6 | 15/32 [00:06<00:07, 2.26it/s]\n",
|
||
|
" 50%|##### | 16/32 [00:06<00:07, 2.25it/s]\n",
|
||
|
" 53%|#####3 | 17/32 [00:07<00:07, 2.09it/s]\n",
|
||
|
" 56%|#####6 | 18/32 [00:07<00:06, 2.15it/s]\n",
|
||
|
" 59%|#####9 | 19/32 [00:08<00:05, 2.21it/s]\n",
|
||
|
" 62%|######2 | 20/32 [00:08<00:05, 2.26it/s]\n",
|
||
|
" 66%|######5 | 21/32 [00:09<00:05, 2.16it/s]\n",
|
||
|
" 69%|######8 | 22/32 [00:09<00:04, 2.05it/s]\n",
|
||
|
" 72%|#######1 | 23/32 [00:10<00:04, 2.14it/s]\n",
|
||
|
" 75%|#######5 | 24/32 [00:10<00:03, 2.11it/s]\n",
|
||
|
" 78%|#######8 | 25/32 [00:11<00:03, 2.23it/s]\n",
|
||
|
" 81%|########1 | 26/32 [00:11<00:02, 2.14it/s]\n",
|
||
|
" 84%|########4 | 27/32 [00:12<00:02, 2.25it/s]\n",
|
||
|
" 88%|########7 | 28/32 [00:12<00:01, 2.12it/s]\n",
|
||
|
" 91%|######### | 29/32 [00:12<00:01, 2.23it/s]\n",
|
||
|
" 94%|#########3| 30/32 [00:13<00:00, 2.27it/s]\n",
|
||
|
" 97%|#########6| 31/32 [00:13<00:00, 2.34it/s]\n",
|
||
|
"100%|##########| 32/32 [00:13<00:00, 2.98it/s]\n",
|
||
|
"100%|##########| 32/32 [00:13<00:00, 2.30it/s]\n",
|
||
|
"[INFO|modelcard.py:449] 2022-02-17 17:37:10,066 >> Dropping the following result as it does not have all the necessary fields:\n",
|
||
|
"{'task': {'name': 'Translation', 'type': 'translation'}}\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"#train and test\n",
|
||
|
"!python run_translation.py \\\n",
|
||
|
"--model_name_or_path out/tweet/t5_version_2 \\\n",
|
||
|
"--output_dir out/tweet/t5-evaluation \\\n",
|
||
|
"--train_file data/translations-train.json \\\n",
|
||
|
"--validation_file data/translations-test.json \\\n",
|
||
|
"--do_eval \\\n",
|
||
|
"--per_device_eval_batch_size 16 \\\n",
|
||
|
"--source_lang text \\\n",
|
||
|
"--target_lang label \\\n",
|
||
|
"--source_prefix \"tweet classification\" \\\n",
|
||
|
"--max_source_length 256 \\\n",
|
||
|
"--max_target_length 128 \\\n",
|
||
|
"--predict_with_generate"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3 (ipykernel)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.8.9"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 1
|
||
|
}
|