{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "svk2qSrl7ICc" }, "source": [ "# **Uczenie Głębokie - projekt**\n", "W projekcie wykorzystano dataset [emotion](https://huggingface.co/datasets/emotion), zawierający wpisy nacechowane określonymi emocjami.\n", "\n", "
\n", "\n", "Labels:\n", "- 0 - sadness\n", "- 1 - joy\n", "- 2 - love\n", "- 3 - anger\n", "- 4 - fear\n", "- 5 - surprise" ] }, { "cell_type": "markdown", "metadata": { "id": "wJ30OIAM7ICf" }, "source": [ "### **REQUIREMENTS**" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "XkE5ENXV7ICf", "outputId": "68ec24ee-8dcd-48b7-c0ce-3d18c1b9bcd6" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Requirement already satisfied: transformers in /usr/local/lib/python3.8/dist-packages (4.23.1)\n", "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.8/dist-packages (1.2.1)\n", "Requirement already satisfied: accelerate in /usr/local/lib/python3.8/dist-packages (0.16.0)\n", "Requirement already satisfied: evaluate in /usr/local/lib/python3.8/dist-packages (0.4.0)\n", "Requirement already satisfied: datasets in /usr/local/lib/python3.8/dist-packages (2.9.0)\n", "Requirement already satisfied: torch in /usr/local/lib/python3.8/dist-packages (1.13.1)\n", "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.8/dist-packages (0.1.97)\n", "Requirement already satisfied: torchvision in /usr/local/lib/python3.8/dist-packages (0.14.1+cu116)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from transformers) (3.9.0)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.8/dist-packages (from transformers) (4.64.1)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from transformers) (1.21.6)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from transformers) (0.12.0)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.8/dist-packages (from transformers) (2022.6.2)\n", "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.8/dist-packages (from transformers) (0.13.2)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.8/dist-packages (from transformers) (2.25.1)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.8/dist-packages (from transformers) (6.0)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from transformers) (23.0)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (3.1.0)\n", "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.2.0)\n", "Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.7.3)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.8/dist-packages (from accelerate) (5.4.8)\n", "Requirement already satisfied: dill in /usr/local/lib/python3.8/dist-packages (from evaluate) (0.3.6)\n", "Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.8/dist-packages (from evaluate) (0.18.0)\n", "Requirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.8/dist-packages (from evaluate) (2023.1.0)\n", "Requirement already satisfied: xxhash in /usr/local/lib/python3.8/dist-packages (from evaluate) (3.2.0)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.8/dist-packages (from evaluate) (1.3.5)\n", "Requirement already satisfied: multiprocess in /usr/local/lib/python3.8/dist-packages (from evaluate) (0.70.14)\n", "Requirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.8/dist-packages (from datasets) (9.0.0)\n", "Requirement already satisfied: aiohttp in /usr/local/lib/python3.8/dist-packages (from datasets) (3.8.3)\n", "Requirement already satisfied: nvidia-cublas-cu11==11.10.3.66 in /usr/local/lib/python3.8/dist-packages (from torch) (11.10.3.66)\n", "Requirement already satisfied: nvidia-cuda-runtime-cu11==11.7.99 in /usr/local/lib/python3.8/dist-packages (from torch) (11.7.99)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.8/dist-packages (from torch) (4.4.0)\n", "Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.7.99 in /usr/local/lib/python3.8/dist-packages (from torch) (11.7.99)\n", "Requirement already satisfied: nvidia-cudnn-cu11==8.5.0.96 in /usr/local/lib/python3.8/dist-packages (from torch) (8.5.0.96)\n", "Requirement already satisfied: wheel in /usr/local/lib/python3.8/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch) (0.38.4)\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.8/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch) (57.4.0)\n", "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.8/dist-packages (from torchvision) (7.1.2)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (4.0.2)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.8.2)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (6.0.4)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.3)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.1)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (22.2.0)\n", "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (2.1.1)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (2022.12.7)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (1.26.14)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (2.10)\n", "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (4.0.0)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas->evaluate) (2.8.2)\n", "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas->evaluate) (2022.7.1)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.8/dist-packages (from python-dateutil>=2.7.3->pandas->evaluate) (1.15.0)\n" ] } ], "source": [ "!pip3 install transformers scikit-learn accelerate evaluate datasets torch sentencepiece torchvision" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "MrV5G1gW7ICg" }, "outputs": [], "source": [ "import os\n", "import json\n", "from pathlib import Path\n", "from typing import Dict, List\n", "from datasets import load_dataset\n", "import torch\n", "import pandas as pd\n", "\n", "os.environ['TOKENIZERS_PARALLELISM'] = 'true'" ] }, { "cell_type": "markdown", "metadata": { "id": "Y107u4JG7ICh" }, "source": [ "### **DATA PREP**" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "PmgAAQFV7ICh", "outputId": "e6f4f065-4d0d-4102-d96a-c5ca791dd113" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "No config specified, defaulting to: emotion/split\n", "Found cached dataset emotion (/root/.cache/huggingface/datasets/emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd)\n", "\r 0% 0/3 [00:00,\n", "ignore_data_skip=False,\n", "include_inputs_for_metrics=False,\n", "jit_mode_eval=False,\n", "label_names=None,\n", "label_smoothing_factor=0.0,\n", "learning_rate=2e-05,\n", "length_column_name=length,\n", "load_best_model_at_end=False,\n", "local_rank=-1,\n", "log_level=passive,\n", "log_level_replica=passive,\n", "log_on_each_node=True,\n", "logging_dir=out/emotion/roberta/runs/Feb14_21-45-00_fc0011e45a00,\n", "logging_first_step=False,\n", "logging_nan_inf_filter=True,\n", "logging_steps=500,\n", "logging_strategy=steps,\n", "lr_scheduler_type=linear,\n", "max_grad_norm=1.0,\n", "max_steps=-1,\n", "metric_for_best_model=None,\n", "mp_parameters=,\n", "no_cuda=False,\n", "num_train_epochs=1.0,\n", "optim=adamw_hf,\n", "output_dir=out/emotion/roberta,\n", "overwrite_output_dir=True,\n", "past_index=-1,\n", "per_device_eval_batch_size=24,\n", "per_device_train_batch_size=24,\n", "prediction_loss_only=False,\n", "push_to_hub=False,\n", "push_to_hub_model_id=None,\n", "push_to_hub_organization=None,\n", "push_to_hub_token=,\n", "ray_scope=last,\n", "remove_unused_columns=True,\n", "report_to=['tensorboard'],\n", "resume_from_checkpoint=None,\n", "run_name=out/emotion/roberta,\n", "save_on_each_node=False,\n", "save_steps=500,\n", "save_strategy=steps,\n", "save_total_limit=None,\n", "seed=42,\n", "sharded_ddp=[],\n", "skip_memory_metrics=True,\n", "tf32=None,\n", "torchdynamo=None,\n", "tpu_metrics_debug=False,\n", "tpu_num_cores=None,\n", "use_ipex=False,\n", "use_legacy_prediction_loop=False,\n", "use_mps_device=False,\n", "warmup_ratio=0.0,\n", "warmup_steps=0,\n", "weight_decay=0.0,\n", "xpu_backend=None,\n", ")\n", "INFO:__main__:load a local file for train: data/train.json\n", "INFO:__main__:load a local file for validation: data/valid.json\n", "INFO:__main__:load a local file for test: data/test.json\n", "WARNING:datasets.builder:Using custom data configuration default-01aa9d8252a24a0d\n", "INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n", "INFO:datasets.builder:Generating dataset json (/content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n", "Downloading and preparing dataset json/default to /content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n", "Downloading data files: 100% 3/3 [00:00<00:00, 11491.24it/s]\n", "INFO:datasets.download.download_manager:Downloading took 0.0 min\n", "INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n", "Extracting data files: 100% 3/3 [00:00<00:00, 1882.54it/s]\n", "INFO:datasets.utils.info_utils:Unable to verify checksums.\n", "INFO:datasets.builder:Generating train split\n", "INFO:datasets.builder:Generating validation split\n", "INFO:datasets.builder:Generating test split\n", "INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n", "Dataset json downloaded and prepared to /content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n", "100% 3/3 [00:00<00:00, 573.49it/s]\n", "Downloading (…)lve/main/config.json: 100% 481/481 [00:00<00:00, 83.8kB/s]\n", "[INFO|configuration_utils.py:653] 2023-02-14 21:45:01,575 >> loading configuration file config.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n", "[INFO|configuration_utils.py:705] 2023-02-14 21:45:01,576 >> Model config RobertaConfig {\n", " \"_name_or_path\": \"roberta-base\",\n", " \"architectures\": [\n", " \"RobertaForMaskedLM\"\n", " ],\n", " \"attention_probs_dropout_prob\": 0.1,\n", " \"bos_token_id\": 0,\n", " \"classifier_dropout\": null,\n", " \"eos_token_id\": 2,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout_prob\": 0.1,\n", " \"hidden_size\": 768,\n", " \"id2label\": {\n", " \"0\": \"LABEL_0\",\n", " \"1\": \"LABEL_1\",\n", " \"2\": \"LABEL_2\",\n", " \"3\": \"LABEL_3\",\n", " \"4\": \"LABEL_4\",\n", " \"5\": \"LABEL_5\"\n", " },\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 3072,\n", " \"label2id\": {\n", " \"LABEL_0\": 0,\n", " \"LABEL_1\": 1,\n", " \"LABEL_2\": 2,\n", " \"LABEL_3\": 3,\n", " \"LABEL_4\": 4,\n", " \"LABEL_5\": 5\n", " },\n", " \"layer_norm_eps\": 1e-05,\n", " \"max_position_embeddings\": 514,\n", " \"model_type\": \"roberta\",\n", " \"num_attention_heads\": 12,\n", " \"num_hidden_layers\": 12,\n", " \"pad_token_id\": 1,\n", " \"position_embedding_type\": \"absolute\",\n", " \"transformers_version\": \"4.23.1\",\n", " \"type_vocab_size\": 1,\n", " \"use_cache\": true,\n", " \"vocab_size\": 50265\n", "}\n", "\n", "[INFO|tokenization_auto.py:418] 2023-02-14 21:45:01,670 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n", "[INFO|configuration_utils.py:653] 2023-02-14 21:45:01,762 >> loading configuration file config.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n", "[INFO|configuration_utils.py:705] 2023-02-14 21:45:01,763 >> Model config RobertaConfig {\n", " \"_name_or_path\": \"roberta-base\",\n", " \"architectures\": [\n", " \"RobertaForMaskedLM\"\n", " ],\n", " \"attention_probs_dropout_prob\": 0.1,\n", " \"bos_token_id\": 0,\n", " \"classifier_dropout\": null,\n", " \"eos_token_id\": 2,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout_prob\": 0.1,\n", " \"hidden_size\": 768,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 3072,\n", " \"layer_norm_eps\": 1e-05,\n", " \"max_position_embeddings\": 514,\n", " \"model_type\": \"roberta\",\n", " \"num_attention_heads\": 12,\n", " \"num_hidden_layers\": 12,\n", " \"pad_token_id\": 1,\n", " \"position_embedding_type\": \"absolute\",\n", " \"transformers_version\": \"4.23.1\",\n", " \"type_vocab_size\": 1,\n", " \"use_cache\": true,\n", " \"vocab_size\": 50265\n", "}\n", "\n", "Downloading (…)olve/main/vocab.json: 100% 899k/899k [00:00<00:00, 9.36MB/s]\n", "Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 4.95MB/s]\n", "Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 11.7MB/s]\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,975 >> loading file vocab.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/vocab.json\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file merges.txt from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/merges.txt\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file tokenizer.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/tokenizer.json\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file added_tokens.json from cache at None\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file special_tokens_map.json from cache at None\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file tokenizer_config.json from cache at None\n", "[INFO|configuration_utils.py:653] 2023-02-14 21:45:02,976 >> loading configuration file config.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n", "[INFO|configuration_utils.py:705] 2023-02-14 21:45:02,977 >> Model config RobertaConfig {\n", " \"_name_or_path\": \"roberta-base\",\n", " \"architectures\": [\n", " \"RobertaForMaskedLM\"\n", " ],\n", " \"attention_probs_dropout_prob\": 0.1,\n", " \"bos_token_id\": 0,\n", " \"classifier_dropout\": null,\n", " \"eos_token_id\": 2,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout_prob\": 0.1,\n", " \"hidden_size\": 768,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 3072,\n", " \"layer_norm_eps\": 1e-05,\n", " \"max_position_embeddings\": 514,\n", " \"model_type\": \"roberta\",\n", " \"num_attention_heads\": 12,\n", " \"num_hidden_layers\": 12,\n", " \"pad_token_id\": 1,\n", " \"position_embedding_type\": \"absolute\",\n", " \"transformers_version\": \"4.23.1\",\n", " \"type_vocab_size\": 1,\n", " \"use_cache\": true,\n", " \"vocab_size\": 50265\n", "}\n", "\n", "INFO:__main__:Using implementation from class: AutoModelForSequenceClassification\n", "Downloading (…)\"pytorch_model.bin\";: 100% 501M/501M [00:04<00:00, 105MB/s]\n", "[INFO|modeling_utils.py:2156] 2023-02-14 21:45:08,072 >> loading weights file pytorch_model.bin from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/pytorch_model.bin\n", "[WARNING|modeling_utils.py:2596] 2023-02-14 21:45:09,415 >> Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight']\n", "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "[WARNING|modeling_utils.py:2608] 2023-02-14 21:45:09,415 >> Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", "\n", "\n", "Frozen layers:\n", "[('roberta.encoder.layer.0.attention.self.query.weight', False), ('roberta.encoder.layer.0.attention.self.query.bias', False), ('roberta.encoder.layer.0.attention.self.key.weight', False), ('roberta.encoder.layer.0.attention.self.key.bias', False), ('roberta.encoder.layer.0.attention.self.value.weight', False), ('roberta.encoder.layer.0.attention.self.value.bias', False), ('roberta.encoder.layer.0.attention.output.dense.weight', False), ('roberta.encoder.layer.0.attention.output.dense.bias', False), ('roberta.encoder.layer.0.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.0.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.0.intermediate.dense.weight', False), ('roberta.encoder.layer.0.intermediate.dense.bias', False), ('roberta.encoder.layer.0.output.dense.weight', False), ('roberta.encoder.layer.0.output.dense.bias', False), ('roberta.encoder.layer.0.output.LayerNorm.weight', False), ('roberta.encoder.layer.0.output.LayerNorm.bias', False), ('roberta.encoder.layer.2.attention.self.query.weight', False), ('roberta.encoder.layer.2.attention.self.query.bias', False), ('roberta.encoder.layer.2.attention.self.key.weight', False), ('roberta.encoder.layer.2.attention.self.key.bias', False), ('roberta.encoder.layer.2.attention.self.value.weight', False), ('roberta.encoder.layer.2.attention.self.value.bias', False), ('roberta.encoder.layer.2.attention.output.dense.weight', False), ('roberta.encoder.layer.2.attention.output.dense.bias', False), ('roberta.encoder.layer.2.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.2.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.2.intermediate.dense.weight', False), ('roberta.encoder.layer.2.intermediate.dense.bias', False), ('roberta.encoder.layer.2.output.dense.weight', False), ('roberta.encoder.layer.2.output.dense.bias', False), ('roberta.encoder.layer.2.output.LayerNorm.weight', False), ('roberta.encoder.layer.2.output.LayerNorm.bias', False), ('roberta.encoder.layer.4.attention.self.query.weight', False), ('roberta.encoder.layer.4.attention.self.query.bias', False), ('roberta.encoder.layer.4.attention.self.key.weight', False), ('roberta.encoder.layer.4.attention.self.key.bias', False), ('roberta.encoder.layer.4.attention.self.value.weight', False), ('roberta.encoder.layer.4.attention.self.value.bias', False), ('roberta.encoder.layer.4.attention.output.dense.weight', False), ('roberta.encoder.layer.4.attention.output.dense.bias', False), ('roberta.encoder.layer.4.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.4.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.4.intermediate.dense.weight', False), ('roberta.encoder.layer.4.intermediate.dense.bias', False), ('roberta.encoder.layer.4.output.dense.weight', False), ('roberta.encoder.layer.4.output.dense.bias', False), ('roberta.encoder.layer.4.output.LayerNorm.weight', False), ('roberta.encoder.layer.4.output.LayerNorm.bias', False), ('roberta.encoder.layer.6.attention.self.query.weight', False), ('roberta.encoder.layer.6.attention.self.query.bias', False), ('roberta.encoder.layer.6.attention.self.key.weight', False), ('roberta.encoder.layer.6.attention.self.key.bias', False), ('roberta.encoder.layer.6.attention.self.value.weight', False), ('roberta.encoder.layer.6.attention.self.value.bias', False), ('roberta.encoder.layer.6.attention.output.dense.weight', False), ('roberta.encoder.layer.6.attention.output.dense.bias', False), ('roberta.encoder.layer.6.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.6.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.6.intermediate.dense.weight', False), ('roberta.encoder.layer.6.intermediate.dense.bias', False), ('roberta.encoder.layer.6.output.dense.weight', False), ('roberta.encoder.layer.6.output.dense.bias', False), ('roberta.encoder.layer.6.output.LayerNorm.weight', False), ('roberta.encoder.layer.6.output.LayerNorm.bias', False), ('roberta.encoder.layer.8.attention.self.query.weight', False), ('roberta.encoder.layer.8.attention.self.query.bias', False), ('roberta.encoder.layer.8.attention.self.key.weight', False), ('roberta.encoder.layer.8.attention.self.key.bias', False), ('roberta.encoder.layer.8.attention.self.value.weight', False), ('roberta.encoder.layer.8.attention.self.value.bias', False), ('roberta.encoder.layer.8.attention.output.dense.weight', False), ('roberta.encoder.layer.8.attention.output.dense.bias', False), ('roberta.encoder.layer.8.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.8.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.8.intermediate.dense.weight', False), ('roberta.encoder.layer.8.intermediate.dense.bias', False), ('roberta.encoder.layer.8.output.dense.weight', False), ('roberta.encoder.layer.8.output.dense.bias', False), ('roberta.encoder.layer.8.output.LayerNorm.weight', False), ('roberta.encoder.layer.8.output.LayerNorm.bias', False), ('roberta.encoder.layer.10.attention.self.query.weight', False), ('roberta.encoder.layer.10.attention.self.query.bias', False), ('roberta.encoder.layer.10.attention.self.key.weight', False), ('roberta.encoder.layer.10.attention.self.key.bias', False), ('roberta.encoder.layer.10.attention.self.value.weight', False), ('roberta.encoder.layer.10.attention.self.value.bias', False), ('roberta.encoder.layer.10.attention.output.dense.weight', False), ('roberta.encoder.layer.10.attention.output.dense.bias', False), ('roberta.encoder.layer.10.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.10.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.10.intermediate.dense.weight', False), ('roberta.encoder.layer.10.intermediate.dense.bias', False), ('roberta.encoder.layer.10.output.dense.weight', False), ('roberta.encoder.layer.10.output.dense.bias', False), ('roberta.encoder.layer.10.output.LayerNorm.weight', False), ('roberta.encoder.layer.10.output.LayerNorm.bias', False)] \n", "\n", "\n", "Running tokenizer on dataset: 0% 0/16 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`, you can safely ignore this message.\n", "/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", " warnings.warn(\n", "[INFO|trainer.py:1607] 2023-02-14 21:45:13,109 >> ***** Running training *****\n", "[INFO|trainer.py:1608] 2023-02-14 21:45:13,109 >> Num examples = 16000\n", "[INFO|trainer.py:1609] 2023-02-14 21:45:13,109 >> Num Epochs = 1\n", "[INFO|trainer.py:1610] 2023-02-14 21:45:13,109 >> Instantaneous batch size per device = 24\n", "[INFO|trainer.py:1611] 2023-02-14 21:45:13,109 >> Total train batch size (w. parallel, distributed & accumulation) = 24\n", "[INFO|trainer.py:1612] 2023-02-14 21:45:13,109 >> Gradient Accumulation steps = 1\n", "[INFO|trainer.py:1613] 2023-02-14 21:45:13,109 >> Total optimization steps = 667\n", "{'loss': 0.8083, 'learning_rate': 5.0074962518740634e-06, 'epoch': 0.75}\n", " 75% 500/667 [00:58<00:19, 8.76it/s][INFO|trainer.py:2656] 2023-02-14 21:46:11,148 >> Saving model checkpoint to out/emotion/roberta/checkpoint-500\n", "[INFO|configuration_utils.py:447] 2023-02-14 21:46:11,149 >> Configuration saved in out/emotion/roberta/checkpoint-500/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 21:46:12,047 >> Model weights saved in out/emotion/roberta/checkpoint-500/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:46:12,048 >> tokenizer config file saved in out/emotion/roberta/checkpoint-500/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:46:12,048 >> Special tokens file saved in out/emotion/roberta/checkpoint-500/special_tokens_map.json\n", "100% 666/667 [01:19<00:00, 8.78it/s][INFO|trainer.py:1852] 2023-02-14 21:46:32,443 >> \n", "\n", "Training completed. Do not forget to share your model on huggingface.co/models =)\n", "\n", "\n", "{'train_runtime': 79.3341, 'train_samples_per_second': 201.679, 'train_steps_per_second': 8.407, 'train_loss': 0.7161429089227359, 'epoch': 1.0}\n", "100% 667/667 [01:19<00:00, 8.41it/s]\n", "[INFO|trainer.py:2656] 2023-02-14 21:46:32,445 >> Saving model checkpoint to out/emotion/roberta\n", "[INFO|configuration_utils.py:447] 2023-02-14 21:46:32,446 >> Configuration saved in out/emotion/roberta/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 21:46:33,422 >> Model weights saved in out/emotion/roberta/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:46:33,422 >> tokenizer config file saved in out/emotion/roberta/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:46:33,423 >> Special tokens file saved in out/emotion/roberta/special_tokens_map.json\n", "***** train metrics *****\n", " epoch = 1.0\n", " train_loss = 0.7161\n", " train_runtime = 0:01:19.33\n", " train_samples = 16000\n", " train_samples_per_second = 201.679\n", " train_steps_per_second = 8.407\n", "INFO:__main__:*** Evaluate ***\n", "[INFO|trainer.py:725] 2023-02-14 21:46:33,524 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:46:33,526 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:46:33,526 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:46:33,526 >> Batch size = 24\n", "100% 84/84 [00:03<00:00, 23.66it/s]\n", "***** eval metrics *****\n", " epoch = 1.0\n", " eval_accuracy = 0.889\n", " eval_loss = 0.3302\n", " eval_runtime = 0:00:03.59\n", " eval_samples = 2000\n", " eval_samples_per_second = 556.411\n", " eval_steps_per_second = 23.369\n", "INFO:__main__:*** Predict ***\n", "[INFO|trainer.py:725] 2023-02-14 21:46:37,124 >> The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:46:37,125 >> ***** Running Prediction *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:46:37,125 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:46:37,125 >> Batch size = 24\n", "100% 84/84 [00:03<00:00, 23.68it/s]\n", "INFO:__main__:***** Predict results None *****\n", "[INFO|modelcard.py:444] 2023-02-14 21:46:40,840 >> Dropping the following result as it does not have all the necessary fields:\n", "{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.8889999985694885}]}\n" ] } ], "source": [ "!python run_glue.py \\\n", " --cache_dir roberta_training_cache \\\n", " --model_name_or_path roberta-base \\\n", " --train_file data/train.json \\\n", " --validation_file data/valid.json \\\n", " --test_file data/test.json \\\n", " --per_device_train_batch_size 24 \\\n", " --per_device_eval_batch_size 24 \\\n", " --do_train \\\n", " --do_eval \\\n", " --do_predict \\\n", " --max_seq_length 128 \\\n", " --learning_rate 2e-5 \\\n", " --num_train_epochs 1 \\\n", " --output_dir out/emotion/roberta \\\n", " --overwrite_output_dir" ] }, { "cell_type": "markdown", "source": [ "- full data\n", "- sequence length: 128\n", "- leakyRelu instad of relu\n", "- every other layer frozen\n", "- custom head" ], "metadata": { "id": "b1iFFLFAf9PC" } }, { "cell_type": "code", "source": [ "!python run_glue.py \\\n", " --cache_dir roberta_custom_training_cache \\\n", " --model_name_or_path roberta-base \\\n", " --custom_model roberta_custom \\\n", " --train_file data/train.json \\\n", " --validation_file data/valid.json \\\n", " --test_file data/test.json \\\n", " --per_device_train_batch_size 24 \\\n", " --per_device_eval_batch_size 24 \\\n", " --do_train \\\n", " --do_eval \\\n", " --do_predict \\\n", " --max_seq_length 128 \\\n", " --learning_rate 2e-5 \\\n", " --num_train_epochs 1 \\\n", " --output_dir out/emotion/roberta_custom \\\n", " --overwrite_output_dir" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "WzRBwNKqkDAk", "outputId": "8d042117-3af6-4041-d1a5-d70024df24fb" }, "execution_count": 9, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "2023-02-14 21:47:02.722049: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2023-02-14 21:47:02.876002: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2023-02-14 21:47:03.659342: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n", "2023-02-14 21:47:03.659451: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n", "2023-02-14 21:47:03.659470: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", "WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", "INFO:__main__:Training/evaluation parameters TrainingArguments(\n", "_n_gpu=1,\n", "adafactor=False,\n", "adam_beta1=0.9,\n", "adam_beta2=0.999,\n", "adam_epsilon=1e-08,\n", "auto_find_batch_size=False,\n", "bf16=False,\n", "bf16_full_eval=False,\n", "data_seed=None,\n", "dataloader_drop_last=False,\n", "dataloader_num_workers=0,\n", "dataloader_pin_memory=True,\n", "ddp_bucket_cap_mb=None,\n", "ddp_find_unused_parameters=None,\n", "ddp_timeout=1800,\n", "debug=[],\n", "deepspeed=None,\n", "disable_tqdm=False,\n", "do_eval=True,\n", "do_predict=True,\n", "do_train=True,\n", "eval_accumulation_steps=None,\n", "eval_delay=0,\n", "eval_steps=None,\n", "evaluation_strategy=no,\n", "fp16=False,\n", "fp16_backend=auto,\n", "fp16_full_eval=False,\n", "fp16_opt_level=O1,\n", "fsdp=[],\n", "fsdp_min_num_params=0,\n", "fsdp_transformer_layer_cls_to_wrap=None,\n", "full_determinism=False,\n", "gradient_accumulation_steps=1,\n", "gradient_checkpointing=False,\n", "greater_is_better=None,\n", "group_by_length=False,\n", "half_precision_backend=auto,\n", "hub_model_id=None,\n", "hub_private_repo=False,\n", "hub_strategy=every_save,\n", "hub_token=,\n", "ignore_data_skip=False,\n", "include_inputs_for_metrics=False,\n", "jit_mode_eval=False,\n", "label_names=None,\n", "label_smoothing_factor=0.0,\n", "learning_rate=2e-05,\n", "length_column_name=length,\n", "load_best_model_at_end=False,\n", "local_rank=-1,\n", "log_level=passive,\n", "log_level_replica=passive,\n", "log_on_each_node=True,\n", "logging_dir=out/emotion/roberta_custom/runs/Feb14_21-47-05_fc0011e45a00,\n", "logging_first_step=False,\n", "logging_nan_inf_filter=True,\n", "logging_steps=500,\n", "logging_strategy=steps,\n", "lr_scheduler_type=linear,\n", "max_grad_norm=1.0,\n", "max_steps=-1,\n", "metric_for_best_model=None,\n", "mp_parameters=,\n", "no_cuda=False,\n", "num_train_epochs=1.0,\n", "optim=adamw_hf,\n", "output_dir=out/emotion/roberta_custom,\n", "overwrite_output_dir=True,\n", "past_index=-1,\n", "per_device_eval_batch_size=24,\n", "per_device_train_batch_size=24,\n", "prediction_loss_only=False,\n", "push_to_hub=False,\n", "push_to_hub_model_id=None,\n", "push_to_hub_organization=None,\n", "push_to_hub_token=,\n", "ray_scope=last,\n", "remove_unused_columns=True,\n", "report_to=['tensorboard'],\n", "resume_from_checkpoint=None,\n", "run_name=out/emotion/roberta_custom,\n", "save_on_each_node=False,\n", "save_steps=500,\n", "save_strategy=steps,\n", "save_total_limit=None,\n", "seed=42,\n", "sharded_ddp=[],\n", "skip_memory_metrics=True,\n", "tf32=None,\n", "torchdynamo=None,\n", "tpu_metrics_debug=False,\n", "tpu_num_cores=None,\n", "use_ipex=False,\n", "use_legacy_prediction_loop=False,\n", "use_mps_device=False,\n", "warmup_ratio=0.0,\n", "warmup_steps=0,\n", "weight_decay=0.0,\n", "xpu_backend=None,\n", ")\n", "INFO:__main__:load a local file for train: data/train.json\n", "INFO:__main__:load a local file for validation: data/valid.json\n", "INFO:__main__:load a local file for test: data/test.json\n", "WARNING:datasets.builder:Using custom data configuration default-01aa9d8252a24a0d\n", "INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n", "INFO:datasets.builder:Generating dataset json (/content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n", "Downloading and preparing dataset json/default to /content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n", "Downloading data files: 100% 3/3 [00:00<00:00, 14463.12it/s]\n", "INFO:datasets.download.download_manager:Downloading took 0.0 min\n", "INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n", "Extracting data files: 100% 3/3 [00:00<00:00, 2119.76it/s]\n", "INFO:datasets.utils.info_utils:Unable to verify checksums.\n", "INFO:datasets.builder:Generating train split\n", "INFO:datasets.builder:Generating validation split\n", "INFO:datasets.builder:Generating test split\n", "INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n", "Dataset json downloaded and prepared to /content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n", "100% 3/3 [00:00<00:00, 657.14it/s]\n", "Downloading (…)lve/main/config.json: 100% 481/481 [00:00<00:00, 88.4kB/s]\n", "[INFO|configuration_utils.py:653] 2023-02-14 21:47:06,896 >> loading configuration file config.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n", "[INFO|configuration_utils.py:705] 2023-02-14 21:47:06,897 >> Model config RobertaConfig {\n", " \"_name_or_path\": \"roberta-base\",\n", " \"architectures\": [\n", " \"RobertaForMaskedLM\"\n", " ],\n", " \"attention_probs_dropout_prob\": 0.1,\n", " \"bos_token_id\": 0,\n", " \"classifier_dropout\": null,\n", " \"eos_token_id\": 2,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout_prob\": 0.1,\n", " \"hidden_size\": 768,\n", " \"id2label\": {\n", " \"0\": \"LABEL_0\",\n", " \"1\": \"LABEL_1\",\n", " \"2\": \"LABEL_2\",\n", " \"3\": \"LABEL_3\",\n", " \"4\": \"LABEL_4\",\n", " \"5\": \"LABEL_5\"\n", " },\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 3072,\n", " \"label2id\": {\n", " \"LABEL_0\": 0,\n", " \"LABEL_1\": 1,\n", " \"LABEL_2\": 2,\n", " \"LABEL_3\": 3,\n", " \"LABEL_4\": 4,\n", " \"LABEL_5\": 5\n", " },\n", " \"layer_norm_eps\": 1e-05,\n", " \"max_position_embeddings\": 514,\n", " \"model_type\": \"roberta\",\n", " \"num_attention_heads\": 12,\n", " \"num_hidden_layers\": 12,\n", " \"pad_token_id\": 1,\n", " \"position_embedding_type\": \"absolute\",\n", " \"transformers_version\": \"4.23.1\",\n", " \"type_vocab_size\": 1,\n", " \"use_cache\": true,\n", " \"vocab_size\": 50265\n", "}\n", "\n", "[INFO|tokenization_auto.py:418] 2023-02-14 21:47:06,989 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n", "[INFO|configuration_utils.py:653] 2023-02-14 21:47:07,079 >> loading configuration file config.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n", "[INFO|configuration_utils.py:705] 2023-02-14 21:47:07,080 >> Model config RobertaConfig {\n", " \"_name_or_path\": \"roberta-base\",\n", " \"architectures\": [\n", " \"RobertaForMaskedLM\"\n", " ],\n", " \"attention_probs_dropout_prob\": 0.1,\n", " \"bos_token_id\": 0,\n", " \"classifier_dropout\": null,\n", " \"eos_token_id\": 2,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout_prob\": 0.1,\n", " \"hidden_size\": 768,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 3072,\n", " \"layer_norm_eps\": 1e-05,\n", " \"max_position_embeddings\": 514,\n", " \"model_type\": \"roberta\",\n", " \"num_attention_heads\": 12,\n", " \"num_hidden_layers\": 12,\n", " \"pad_token_id\": 1,\n", " \"position_embedding_type\": \"absolute\",\n", " \"transformers_version\": \"4.23.1\",\n", " \"type_vocab_size\": 1,\n", " \"use_cache\": true,\n", " \"vocab_size\": 50265\n", "}\n", "\n", "Downloading (…)olve/main/vocab.json: 100% 899k/899k [00:00<00:00, 9.35MB/s]\n", "Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 4.91MB/s]\n", "Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 10.3MB/s]\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file vocab.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/vocab.json\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file merges.txt from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/merges.txt\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file tokenizer.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/tokenizer.json\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file added_tokens.json from cache at None\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file special_tokens_map.json from cache at None\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file tokenizer_config.json from cache at None\n", "[INFO|configuration_utils.py:653] 2023-02-14 21:47:08,306 >> loading configuration file config.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n", "[INFO|configuration_utils.py:705] 2023-02-14 21:47:08,306 >> Model config RobertaConfig {\n", " \"_name_or_path\": \"roberta-base\",\n", " \"architectures\": [\n", " \"RobertaForMaskedLM\"\n", " ],\n", " \"attention_probs_dropout_prob\": 0.1,\n", " \"bos_token_id\": 0,\n", " \"classifier_dropout\": null,\n", " \"eos_token_id\": 2,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout_prob\": 0.1,\n", " \"hidden_size\": 768,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 3072,\n", " \"layer_norm_eps\": 1e-05,\n", " \"max_position_embeddings\": 514,\n", " \"model_type\": \"roberta\",\n", " \"num_attention_heads\": 12,\n", " \"num_hidden_layers\": 12,\n", " \"pad_token_id\": 1,\n", " \"position_embedding_type\": \"absolute\",\n", " \"transformers_version\": \"4.23.1\",\n", " \"type_vocab_size\": 1,\n", " \"use_cache\": true,\n", " \"vocab_size\": 50265\n", "}\n", "\n", "INFO:__main__:Using hidden states in model: False\n", "INFO:__main__:Using implementation from class: RobertaForSequenceClassificationCustomAlternative\n", "Downloading (…)\"pytorch_model.bin\";: 100% 501M/501M [00:04<00:00, 106MB/s]\n", "[INFO|modeling_utils.py:2156] 2023-02-14 21:47:13,300 >> loading weights file pytorch_model.bin from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/pytorch_model.bin\n", "[WARNING|modeling_utils.py:2596] 2023-02-14 21:47:15,772 >> Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassificationCustomAlternative: ['roberta.pooler.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight']\n", "- This IS expected if you are initializing RobertaForSequenceClassificationCustomAlternative from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing RobertaForSequenceClassificationCustomAlternative from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "[WARNING|modeling_utils.py:2608] 2023-02-14 21:47:15,772 >> Some weights of RobertaForSequenceClassificationCustomAlternative were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense_1_input.weight', 'classifier.dense_2.weight', 'classifier.out_proj.bias', 'classifier.dense_2.bias', 'classifier.dense_1_input.bias', 'classifier.dense_1_hidden.weight', 'classifier.dense_1_hidden.bias', 'classifier.out_proj.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", "\n", "\n", "Frozen layers:\n", "[('roberta.encoder.layer.0.attention.self.query.weight', False), ('roberta.encoder.layer.0.attention.self.query.bias', False), ('roberta.encoder.layer.0.attention.self.key.weight', False), ('roberta.encoder.layer.0.attention.self.key.bias', False), ('roberta.encoder.layer.0.attention.self.value.weight', False), ('roberta.encoder.layer.0.attention.self.value.bias', False), ('roberta.encoder.layer.0.attention.output.dense.weight', False), ('roberta.encoder.layer.0.attention.output.dense.bias', False), ('roberta.encoder.layer.0.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.0.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.0.intermediate.dense.weight', False), ('roberta.encoder.layer.0.intermediate.dense.bias', False), ('roberta.encoder.layer.0.output.dense.weight', False), ('roberta.encoder.layer.0.output.dense.bias', False), ('roberta.encoder.layer.0.output.LayerNorm.weight', False), ('roberta.encoder.layer.0.output.LayerNorm.bias', False), ('roberta.encoder.layer.2.attention.self.query.weight', False), ('roberta.encoder.layer.2.attention.self.query.bias', False), ('roberta.encoder.layer.2.attention.self.key.weight', False), ('roberta.encoder.layer.2.attention.self.key.bias', False), ('roberta.encoder.layer.2.attention.self.value.weight', False), ('roberta.encoder.layer.2.attention.self.value.bias', False), ('roberta.encoder.layer.2.attention.output.dense.weight', False), ('roberta.encoder.layer.2.attention.output.dense.bias', False), ('roberta.encoder.layer.2.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.2.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.2.intermediate.dense.weight', False), ('roberta.encoder.layer.2.intermediate.dense.bias', False), ('roberta.encoder.layer.2.output.dense.weight', False), ('roberta.encoder.layer.2.output.dense.bias', False), ('roberta.encoder.layer.2.output.LayerNorm.weight', False), ('roberta.encoder.layer.2.output.LayerNorm.bias', False), ('roberta.encoder.layer.4.attention.self.query.weight', False), ('roberta.encoder.layer.4.attention.self.query.bias', False), ('roberta.encoder.layer.4.attention.self.key.weight', False), ('roberta.encoder.layer.4.attention.self.key.bias', False), ('roberta.encoder.layer.4.attention.self.value.weight', False), ('roberta.encoder.layer.4.attention.self.value.bias', False), ('roberta.encoder.layer.4.attention.output.dense.weight', False), ('roberta.encoder.layer.4.attention.output.dense.bias', False), ('roberta.encoder.layer.4.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.4.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.4.intermediate.dense.weight', False), ('roberta.encoder.layer.4.intermediate.dense.bias', False), ('roberta.encoder.layer.4.output.dense.weight', False), ('roberta.encoder.layer.4.output.dense.bias', False), ('roberta.encoder.layer.4.output.LayerNorm.weight', False), ('roberta.encoder.layer.4.output.LayerNorm.bias', False), ('roberta.encoder.layer.6.attention.self.query.weight', False), ('roberta.encoder.layer.6.attention.self.query.bias', False), ('roberta.encoder.layer.6.attention.self.key.weight', False), ('roberta.encoder.layer.6.attention.self.key.bias', False), ('roberta.encoder.layer.6.attention.self.value.weight', False), ('roberta.encoder.layer.6.attention.self.value.bias', False), ('roberta.encoder.layer.6.attention.output.dense.weight', False), ('roberta.encoder.layer.6.attention.output.dense.bias', False), ('roberta.encoder.layer.6.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.6.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.6.intermediate.dense.weight', False), ('roberta.encoder.layer.6.intermediate.dense.bias', False), ('roberta.encoder.layer.6.output.dense.weight', False), ('roberta.encoder.layer.6.output.dense.bias', False), ('roberta.encoder.layer.6.output.LayerNorm.weight', False), ('roberta.encoder.layer.6.output.LayerNorm.bias', False), ('roberta.encoder.layer.8.attention.self.query.weight', False), ('roberta.encoder.layer.8.attention.self.query.bias', False), ('roberta.encoder.layer.8.attention.self.key.weight', False), ('roberta.encoder.layer.8.attention.self.key.bias', False), ('roberta.encoder.layer.8.attention.self.value.weight', False), ('roberta.encoder.layer.8.attention.self.value.bias', False), ('roberta.encoder.layer.8.attention.output.dense.weight', False), ('roberta.encoder.layer.8.attention.output.dense.bias', False), ('roberta.encoder.layer.8.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.8.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.8.intermediate.dense.weight', False), ('roberta.encoder.layer.8.intermediate.dense.bias', False), ('roberta.encoder.layer.8.output.dense.weight', False), ('roberta.encoder.layer.8.output.dense.bias', False), ('roberta.encoder.layer.8.output.LayerNorm.weight', False), ('roberta.encoder.layer.8.output.LayerNorm.bias', False), ('roberta.encoder.layer.10.attention.self.query.weight', False), ('roberta.encoder.layer.10.attention.self.query.bias', False), ('roberta.encoder.layer.10.attention.self.key.weight', False), ('roberta.encoder.layer.10.attention.self.key.bias', False), ('roberta.encoder.layer.10.attention.self.value.weight', False), ('roberta.encoder.layer.10.attention.self.value.bias', False), ('roberta.encoder.layer.10.attention.output.dense.weight', False), ('roberta.encoder.layer.10.attention.output.dense.bias', False), ('roberta.encoder.layer.10.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.10.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.10.intermediate.dense.weight', False), ('roberta.encoder.layer.10.intermediate.dense.bias', False), ('roberta.encoder.layer.10.output.dense.weight', False), ('roberta.encoder.layer.10.output.dense.bias', False), ('roberta.encoder.layer.10.output.LayerNorm.weight', False), ('roberta.encoder.layer.10.output.LayerNorm.bias', False)] \n", "\n", "\n", "Running tokenizer on dataset: 0% 0/16 [00:00> The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassificationCustomAlternative.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassificationCustomAlternative.forward`, you can safely ignore this message.\n", "/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", " warnings.warn(\n", "[INFO|trainer.py:1607] 2023-02-14 21:47:19,649 >> ***** Running training *****\n", "[INFO|trainer.py:1608] 2023-02-14 21:47:19,649 >> Num examples = 16000\n", "[INFO|trainer.py:1609] 2023-02-14 21:47:19,649 >> Num Epochs = 1\n", "[INFO|trainer.py:1610] 2023-02-14 21:47:19,649 >> Instantaneous batch size per device = 24\n", "[INFO|trainer.py:1611] 2023-02-14 21:47:19,649 >> Total train batch size (w. parallel, distributed & accumulation) = 24\n", "[INFO|trainer.py:1612] 2023-02-14 21:47:19,649 >> Gradient Accumulation steps = 1\n", "[INFO|trainer.py:1613] 2023-02-14 21:47:19,649 >> Total optimization steps = 667\n", "{'loss': 0.8955, 'learning_rate': 5.0074962518740634e-06, 'epoch': 0.75}\n", " 75% 500/667 [00:58<00:19, 8.75it/s][INFO|trainer.py:2656] 2023-02-14 21:48:17,996 >> Saving model checkpoint to out/emotion/roberta_custom/checkpoint-500\n", "[INFO|configuration_utils.py:447] 2023-02-14 21:48:17,997 >> Configuration saved in out/emotion/roberta_custom/checkpoint-500/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 21:48:19,015 >> Model weights saved in out/emotion/roberta_custom/checkpoint-500/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:48:19,016 >> tokenizer config file saved in out/emotion/roberta_custom/checkpoint-500/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:48:19,016 >> Special tokens file saved in out/emotion/roberta_custom/checkpoint-500/special_tokens_map.json\n", "100% 666/667 [01:20<00:00, 8.66it/s][INFO|trainer.py:1852] 2023-02-14 21:48:40,745 >> \n", "\n", "Training completed. Do not forget to share your model on huggingface.co/models =)\n", "\n", "\n", "{'train_runtime': 81.0963, 'train_samples_per_second': 197.296, 'train_steps_per_second': 8.225, 'train_loss': 0.8004468377383573, 'epoch': 1.0}\n", "100% 667/667 [01:21<00:00, 8.23it/s]\n", "[INFO|trainer.py:2656] 2023-02-14 21:48:40,747 >> Saving model checkpoint to out/emotion/roberta_custom\n", "[INFO|configuration_utils.py:447] 2023-02-14 21:48:40,748 >> Configuration saved in out/emotion/roberta_custom/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 21:48:41,796 >> Model weights saved in out/emotion/roberta_custom/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:48:41,797 >> tokenizer config file saved in out/emotion/roberta_custom/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:48:41,797 >> Special tokens file saved in out/emotion/roberta_custom/special_tokens_map.json\n", "***** train metrics *****\n", " epoch = 1.0\n", " train_loss = 0.8004\n", " train_runtime = 0:01:21.09\n", " train_samples = 16000\n", " train_samples_per_second = 197.296\n", " train_steps_per_second = 8.225\n", "INFO:__main__:*** Evaluate ***\n", "[INFO|trainer.py:725] 2023-02-14 21:48:41,898 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassificationCustomAlternative.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassificationCustomAlternative.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:48:41,899 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:48:41,900 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:48:41,900 >> Batch size = 24\n", "100% 84/84 [00:03<00:00, 23.62it/s]\n", "***** eval metrics *****\n", " epoch = 1.0\n", " eval_accuracy = 0.867\n", " eval_loss = 0.39\n", " eval_runtime = 0:00:03.59\n", " eval_samples = 2000\n", " eval_samples_per_second = 555.583\n", " eval_steps_per_second = 23.334\n", "INFO:__main__:*** Predict ***\n", "[INFO|trainer.py:725] 2023-02-14 21:48:45,503 >> The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassificationCustomAlternative.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassificationCustomAlternative.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:48:45,504 >> ***** Running Prediction *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:48:45,504 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:48:45,504 >> Batch size = 24\n", "100% 84/84 [00:03<00:00, 23.74it/s]\n", "INFO:__main__:***** Predict results None *****\n", "[INFO|modelcard.py:444] 2023-02-14 21:48:49,211 >> Dropping the following result as it does not have all the necessary fields:\n", "{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.8669999837875366}]}\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "HUdoRk5o7ICl" }, "source": [ "## **GPT2**" ] }, { "cell_type": "markdown", "metadata": { "id": "exFg0yb-7ICl" }, "source": [ "- full data\n", "- model `GPT2`\n", "- sequnece length: 128\n", "- training epoch: 1" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "id": "DMHK35db7ICl", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "5a3776f5-7feb-480b-a433-a80ed81f3eb7" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "2023-02-14 21:48:52.605236: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2023-02-14 21:48:52.757779: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2023-02-14 21:48:53.540701: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n", "2023-02-14 21:48:53.540799: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n", "2023-02-14 21:48:53.540819: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", "WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", "INFO:__main__:Training/evaluation parameters TrainingArguments(\n", "_n_gpu=1,\n", "adafactor=False,\n", "adam_beta1=0.9,\n", "adam_beta2=0.999,\n", "adam_epsilon=1e-08,\n", "auto_find_batch_size=False,\n", "bf16=False,\n", "bf16_full_eval=False,\n", "data_seed=None,\n", "dataloader_drop_last=False,\n", "dataloader_num_workers=0,\n", "dataloader_pin_memory=True,\n", "ddp_bucket_cap_mb=None,\n", "ddp_find_unused_parameters=None,\n", "ddp_timeout=1800,\n", "debug=[],\n", "deepspeed=None,\n", "disable_tqdm=False,\n", "do_eval=True,\n", "do_predict=True,\n", "do_train=True,\n", "eval_accumulation_steps=None,\n", "eval_delay=0,\n", "eval_steps=250,\n", "evaluation_strategy=steps,\n", "fp16=False,\n", "fp16_backend=auto,\n", "fp16_full_eval=False,\n", "fp16_opt_level=O1,\n", "fsdp=[],\n", "fsdp_min_num_params=0,\n", "fsdp_transformer_layer_cls_to_wrap=None,\n", "full_determinism=False,\n", "gradient_accumulation_steps=1,\n", "gradient_checkpointing=False,\n", "greater_is_better=True,\n", "group_by_length=False,\n", "half_precision_backend=auto,\n", "hub_model_id=None,\n", "hub_private_repo=False,\n", "hub_strategy=every_save,\n", "hub_token=,\n", "ignore_data_skip=False,\n", "include_inputs_for_metrics=False,\n", "jit_mode_eval=False,\n", "label_names=None,\n", "label_smoothing_factor=0.0,\n", "learning_rate=2e-05,\n", "length_column_name=length,\n", "load_best_model_at_end=True,\n", "local_rank=-1,\n", "log_level=passive,\n", "log_level_replica=passive,\n", "log_on_each_node=True,\n", "logging_dir=out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00,\n", "logging_first_step=False,\n", "logging_nan_inf_filter=True,\n", "logging_steps=100,\n", "logging_strategy=steps,\n", "lr_scheduler_type=linear,\n", "max_grad_norm=1.0,\n", "max_steps=2500,\n", "metric_for_best_model=accuracy,\n", "mp_parameters=,\n", "no_cuda=False,\n", "num_train_epochs=1.0,\n", "optim=adamw_hf,\n", "output_dir=out/emotion/gpt2,\n", "overwrite_output_dir=True,\n", "past_index=-1,\n", "per_device_eval_batch_size=24,\n", "per_device_train_batch_size=24,\n", "prediction_loss_only=False,\n", "push_to_hub=False,\n", "push_to_hub_model_id=None,\n", "push_to_hub_organization=None,\n", "push_to_hub_token=,\n", "ray_scope=last,\n", "remove_unused_columns=True,\n", "report_to=['tensorboard'],\n", "resume_from_checkpoint=None,\n", "run_name=out/emotion/gpt2,\n", "save_on_each_node=False,\n", "save_steps=500,\n", "save_strategy=steps,\n", "save_total_limit=5,\n", "seed=42,\n", "sharded_ddp=[],\n", "skip_memory_metrics=True,\n", "tf32=None,\n", "torchdynamo=None,\n", "tpu_metrics_debug=False,\n", "tpu_num_cores=None,\n", "use_ipex=False,\n", "use_legacy_prediction_loop=False,\n", "use_mps_device=False,\n", "warmup_ratio=0.0,\n", "warmup_steps=0,\n", "weight_decay=0.0,\n", "xpu_backend=None,\n", ")\n", "INFO:__main__:load a local file for train: data/train.json\n", "INFO:__main__:load a local file for validation: data/valid.json\n", "INFO:__main__:load a local file for test: data/test.json\n", "WARNING:datasets.builder:Using custom data configuration default-01aa9d8252a24a0d\n", "INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n", "INFO:datasets.builder:Generating dataset json (/content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n", "Downloading and preparing dataset json/default to /content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n", "Downloading data files: 100% 3/3 [00:00<00:00, 12169.16it/s]\n", "INFO:datasets.download.download_manager:Downloading took 0.0 min\n", "INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n", "Extracting data files: 100% 3/3 [00:00<00:00, 2183.40it/s]\n", "INFO:datasets.utils.info_utils:Unable to verify checksums.\n", "INFO:datasets.builder:Generating train split\n", "INFO:datasets.builder:Generating validation split\n", "INFO:datasets.builder:Generating test split\n", "INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n", "Dataset json downloaded and prepared to /content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n", "100% 3/3 [00:00<00:00, 665.62it/s]\n", "Downloading (…)lve/main/config.json: 100% 665/665 [00:00<00:00, 125kB/s]\n", "[INFO|configuration_utils.py:653] 2023-02-14 21:48:57,052 >> loading configuration file config.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n", "[INFO|configuration_utils.py:705] 2023-02-14 21:48:57,053 >> Model config GPT2Config {\n", " \"_name_or_path\": \"gpt2\",\n", " \"activation_function\": \"gelu_new\",\n", " \"architectures\": [\n", " \"GPT2LMHeadModel\"\n", " ],\n", " \"attn_pdrop\": 0.1,\n", " \"bos_token_id\": 50256,\n", " \"embd_pdrop\": 0.1,\n", " \"eos_token_id\": 50256,\n", " \"id2label\": {\n", " \"0\": \"LABEL_0\",\n", " \"1\": \"LABEL_1\",\n", " \"2\": \"LABEL_2\",\n", " \"3\": \"LABEL_3\",\n", " \"4\": \"LABEL_4\",\n", " \"5\": \"LABEL_5\"\n", " },\n", " \"initializer_range\": 0.02,\n", " \"label2id\": {\n", " \"LABEL_0\": 0,\n", " \"LABEL_1\": 1,\n", " \"LABEL_2\": 2,\n", " \"LABEL_3\": 3,\n", " \"LABEL_4\": 4,\n", " \"LABEL_5\": 5\n", " },\n", " \"layer_norm_epsilon\": 1e-05,\n", " \"model_type\": \"gpt2\",\n", " \"n_ctx\": 1024,\n", " \"n_embd\": 768,\n", " \"n_head\": 12,\n", " \"n_inner\": null,\n", " \"n_layer\": 12,\n", " \"n_positions\": 1024,\n", " \"reorder_and_upcast_attn\": false,\n", " \"resid_pdrop\": 0.1,\n", " \"scale_attn_by_inverse_layer_idx\": false,\n", " \"scale_attn_weights\": true,\n", " \"summary_activation\": null,\n", " \"summary_first_dropout\": 0.1,\n", " \"summary_proj_to_labels\": true,\n", " \"summary_type\": \"cls_index\",\n", " \"summary_use_proj\": true,\n", " \"task_specific_params\": {\n", " \"text-generation\": {\n", " \"do_sample\": true,\n", " \"max_length\": 50\n", " }\n", " },\n", " \"transformers_version\": \"4.23.1\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 50257\n", "}\n", "\n", "[INFO|tokenization_auto.py:418] 2023-02-14 21:48:57,145 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n", "[INFO|configuration_utils.py:653] 2023-02-14 21:48:57,236 >> loading configuration file config.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n", "[INFO|configuration_utils.py:705] 2023-02-14 21:48:57,237 >> Model config GPT2Config {\n", " \"_name_or_path\": \"gpt2\",\n", " \"activation_function\": \"gelu_new\",\n", " \"architectures\": [\n", " \"GPT2LMHeadModel\"\n", " ],\n", " \"attn_pdrop\": 0.1,\n", " \"bos_token_id\": 50256,\n", " \"embd_pdrop\": 0.1,\n", " \"eos_token_id\": 50256,\n", " \"initializer_range\": 0.02,\n", " \"layer_norm_epsilon\": 1e-05,\n", " \"model_type\": \"gpt2\",\n", " \"n_ctx\": 1024,\n", " \"n_embd\": 768,\n", " \"n_head\": 12,\n", " \"n_inner\": null,\n", " \"n_layer\": 12,\n", " \"n_positions\": 1024,\n", " \"reorder_and_upcast_attn\": false,\n", " \"resid_pdrop\": 0.1,\n", " \"scale_attn_by_inverse_layer_idx\": false,\n", " \"scale_attn_weights\": true,\n", " \"summary_activation\": null,\n", " \"summary_first_dropout\": 0.1,\n", " \"summary_proj_to_labels\": true,\n", " \"summary_type\": \"cls_index\",\n", " \"summary_use_proj\": true,\n", " \"task_specific_params\": {\n", " \"text-generation\": {\n", " \"do_sample\": true,\n", " \"max_length\": 50\n", " }\n", " },\n", " \"transformers_version\": \"4.23.1\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 50257\n", "}\n", "\n", "Downloading (…)olve/main/vocab.json: 100% 1.04M/1.04M [00:00<00:00, 9.20MB/s]\n", "Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 6.19MB/s]\n", "Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 11.7MB/s]\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file vocab.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/vocab.json\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file merges.txt from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/merges.txt\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file tokenizer.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer.json\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file added_tokens.json from cache at None\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file special_tokens_map.json from cache at None\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file tokenizer_config.json from cache at None\n", "[INFO|configuration_utils.py:653] 2023-02-14 21:48:58,447 >> loading configuration file config.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n", "[INFO|configuration_utils.py:705] 2023-02-14 21:48:58,448 >> Model config GPT2Config {\n", " \"_name_or_path\": \"gpt2\",\n", " \"activation_function\": \"gelu_new\",\n", " \"architectures\": [\n", " \"GPT2LMHeadModel\"\n", " ],\n", " \"attn_pdrop\": 0.1,\n", " \"bos_token_id\": 50256,\n", " \"embd_pdrop\": 0.1,\n", " \"eos_token_id\": 50256,\n", " \"initializer_range\": 0.02,\n", " \"layer_norm_epsilon\": 1e-05,\n", " \"model_type\": \"gpt2\",\n", " \"n_ctx\": 1024,\n", " \"n_embd\": 768,\n", " \"n_head\": 12,\n", " \"n_inner\": null,\n", " \"n_layer\": 12,\n", " \"n_positions\": 1024,\n", " \"reorder_and_upcast_attn\": false,\n", " \"resid_pdrop\": 0.1,\n", " \"scale_attn_by_inverse_layer_idx\": false,\n", " \"scale_attn_weights\": true,\n", " \"summary_activation\": null,\n", " \"summary_first_dropout\": 0.1,\n", " \"summary_proj_to_labels\": true,\n", " \"summary_type\": \"cls_index\",\n", " \"summary_use_proj\": true,\n", " \"task_specific_params\": {\n", " \"text-generation\": {\n", " \"do_sample\": true,\n", " \"max_length\": 50\n", " }\n", " },\n", " \"transformers_version\": \"4.23.1\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 50257\n", "}\n", "\n", "INFO:__main__:Using implementation from class: AutoModelForSequenceClassification\n", "Downloading (…)\"pytorch_model.bin\";: 100% 548M/548M [00:05<00:00, 108MB/s]\n", "[INFO|modeling_utils.py:2156] 2023-02-14 21:49:03,784 >> loading weights file pytorch_model.bin from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin\n", "[INFO|modeling_utils.py:2606] 2023-02-14 21:49:05,169 >> All model checkpoint weights were used when initializing GPT2ForSequenceClassification.\n", "\n", "[WARNING|modeling_utils.py:2608] 2023-02-14 21:49:05,169 >> Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", "[ERROR|tokenization_utils_base.py:1019] 2023-02-14 21:49:05,177 >> Using pad_token, but it is not set yet.\n", "INFO:__main__:Set PAD token to EOS: <|endoftext|>\n", "Running tokenizer on dataset: 0% 0/16 [00:00> max_steps is given, it will override any value given in num_train_epochs\n", "[INFO|trainer.py:725] 2023-02-14 21:49:08,712 >> The following columns in the training set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n", "/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", " warnings.warn(\n", "[INFO|trainer.py:1607] 2023-02-14 21:49:08,718 >> ***** Running training *****\n", "[INFO|trainer.py:1608] 2023-02-14 21:49:08,718 >> Num examples = 16000\n", "[INFO|trainer.py:1609] 2023-02-14 21:49:08,718 >> Num Epochs = 4\n", "[INFO|trainer.py:1610] 2023-02-14 21:49:08,719 >> Instantaneous batch size per device = 24\n", "[INFO|trainer.py:1611] 2023-02-14 21:49:08,719 >> Total train batch size (w. parallel, distributed & accumulation) = 24\n", "[INFO|trainer.py:1612] 2023-02-14 21:49:08,719 >> Gradient Accumulation steps = 1\n", "[INFO|trainer.py:1613] 2023-02-14 21:49:08,719 >> Total optimization steps = 2500\n", "{'loss': 2.3442, 'learning_rate': 1.9200000000000003e-05, 'epoch': 0.15}\n", "{'loss': 1.3126, 'learning_rate': 1.8400000000000003e-05, 'epoch': 0.3}\n", " 10% 250/2500 [00:37<05:31, 6.79it/s][INFO|trainer.py:725] 2023-02-14 21:49:46,426 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:49:46,428 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:49:46,428 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:49:46,428 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:50:27,314 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:50:27,314 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:50:27,314 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> Saving model checkpoint to out/emotion/gpt2/checkpoint-500\n", "[INFO|configuration_utils.py:447] 2023-02-14 21:50:31,308 >> Configuration saved in out/emotion/gpt2/checkpoint-500/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 21:50:32,356 >> Model weights saved in out/emotion/gpt2/checkpoint-500/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:50:32,357 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-500/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:50:32,357 >> Special tokens file saved in out/emotion/gpt2/checkpoint-500/special_tokens_map.json\n", "{'loss': 0.3554, 'learning_rate': 1.5200000000000002e-05, 'epoch': 0.9}\n", "{'loss': 0.2871, 'learning_rate': 1.4400000000000001e-05, 'epoch': 1.05}\n", " 30% 750/2500 [02:02<04:19, 6.74it/s][INFO|trainer.py:725] 2023-02-14 21:51:11,104 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:51:11,106 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:51:11,106 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:51:11,106 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:51:51,749 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:51:51,750 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:51:51,750 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> Saving model checkpoint to out/emotion/gpt2/checkpoint-1000\n", "[INFO|configuration_utils.py:447] 2023-02-14 21:51:55,717 >> Configuration saved in out/emotion/gpt2/checkpoint-1000/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 21:51:56,708 >> Model weights saved in out/emotion/gpt2/checkpoint-1000/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:51:56,709 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-1000/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:51:56,709 >> Special tokens file saved in out/emotion/gpt2/checkpoint-1000/special_tokens_map.json\n", "{'loss': 0.1906, 'learning_rate': 1.1200000000000001e-05, 'epoch': 1.65}\n", "{'loss': 0.1793, 'learning_rate': 1.04e-05, 'epoch': 1.8}\n", " 50% 1250/2500 [03:26<03:04, 6.76it/s][INFO|trainer.py:725] 2023-02-14 21:52:35,220 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:52:35,222 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:52:35,222 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:52:35,222 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:53:15,833 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:53:15,833 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:53:15,833 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> Saving model checkpoint to out/emotion/gpt2/checkpoint-1500\n", "[INFO|configuration_utils.py:447] 2023-02-14 21:53:19,812 >> Configuration saved in out/emotion/gpt2/checkpoint-1500/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 21:53:21,455 >> Model weights saved in out/emotion/gpt2/checkpoint-1500/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:53:21,456 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-1500/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:53:21,456 >> Special tokens file saved in out/emotion/gpt2/checkpoint-1500/special_tokens_map.json\n", "{'loss': 0.157, 'learning_rate': 7.2000000000000005e-06, 'epoch': 2.4}\n", "{'loss': 0.141, 'learning_rate': 6.4000000000000006e-06, 'epoch': 2.55}\n", " 70% 1750/2500 [04:51<01:50, 6.80it/s][INFO|trainer.py:725] 2023-02-14 21:54:00,007 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:54:00,009 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:54:00,009 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:54:00,009 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:54:40,635 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:54:40,635 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:54:40,635 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> Saving model checkpoint to out/emotion/gpt2/checkpoint-2000\n", "[INFO|configuration_utils.py:447] 2023-02-14 21:54:44,615 >> Configuration saved in out/emotion/gpt2/checkpoint-2000/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 21:54:46,838 >> Model weights saved in out/emotion/gpt2/checkpoint-2000/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:54:46,839 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-2000/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:54:46,839 >> Special tokens file saved in out/emotion/gpt2/checkpoint-2000/special_tokens_map.json\n", "{'loss': 0.1256, 'learning_rate': 3.2000000000000003e-06, 'epoch': 3.15}\n", "{'loss': 0.1246, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.3}\n", " 90% 2250/2500 [06:16<00:36, 6.76it/s][INFO|trainer.py:725] 2023-02-14 21:55:25,309 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:55:25,311 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:55:25,311 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:55:25,311 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:56:05,971 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:56:05,971 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:56:05,971 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> Saving model checkpoint to out/emotion/gpt2/checkpoint-2500\n", "[INFO|configuration_utils.py:447] 2023-02-14 21:56:09,957 >> Configuration saved in out/emotion/gpt2/checkpoint-2500/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 21:56:10,953 >> Model weights saved in out/emotion/gpt2/checkpoint-2500/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:56:10,954 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-2500/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:56:10,954 >> Special tokens file saved in out/emotion/gpt2/checkpoint-2500/special_tokens_map.json\n", "[INFO|trainer.py:1852] 2023-02-14 21:56:12,777 >> \n", "\n", "Training completed. Do not forget to share your model on huggingface.co/models =)\n", "\n", "\n", "[INFO|trainer.py:1946] 2023-02-14 21:56:12,778 >> Loading best model from out/emotion/gpt2/checkpoint-1500 (score: 0.9330000281333923).\n", "{'train_runtime': 424.4983, 'train_samples_per_second': 141.343, 'train_steps_per_second': 5.889, 'train_loss': 0.351297896194458, 'epoch': 3.75}\n", "100% 2500/2500 [07:04<00:00, 5.89it/s]\n", "[INFO|trainer.py:2656] 2023-02-14 21:56:13,218 >> Saving model checkpoint to out/emotion/gpt2\n", "[INFO|configuration_utils.py:447] 2023-02-14 21:56:13,220 >> Configuration saved in out/emotion/gpt2/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 21:56:14,063 >> Model weights saved in out/emotion/gpt2/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:56:14,064 >> tokenizer config file saved in out/emotion/gpt2/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:56:14,064 >> Special tokens file saved in out/emotion/gpt2/special_tokens_map.json\n", "***** train metrics *****\n", " epoch = 3.75\n", " train_loss = 0.3513\n", " train_runtime = 0:07:04.49\n", " train_samples = 16000\n", " train_samples_per_second = 141.343\n", " train_steps_per_second = 5.889\n", "INFO:__main__:*** Evaluate ***\n", "[INFO|trainer.py:725] 2023-02-14 21:56:14,169 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:56:14,170 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:56:14,170 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:56:14,170 >> Batch size = 24\n", "100% 84/84 [00:03<00:00, 21.20it/s]\n", "***** eval metrics *****\n", " epoch = 3.75\n", " eval_accuracy = 0.933\n", " eval_loss = 0.1609\n", " eval_runtime = 0:00:04.02\n", " eval_samples = 2000\n", " eval_samples_per_second = 497.496\n", " eval_steps_per_second = 20.895\n", "INFO:__main__:*** Predict ***\n", "[INFO|trainer.py:725] 2023-02-14 21:56:18,194 >> The following columns in the test set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:56:18,195 >> ***** Running Prediction *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:56:18,195 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:56:18,195 >> Batch size = 24\n", "100% 84/84 [00:03<00:00, 21.40it/s]\n", "INFO:__main__:***** Predict results None *****\n", "[INFO|modelcard.py:444] 2023-02-14 21:56:22,304 >> Dropping the following result as it does not have all the necessary fields:\n", "{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.9330000281333923}]}\n" ] } ], "source": [ "!python run_glue.py \\\n", " --cache_dir gtp_cache_training \\\n", " --model_name_or_path gpt2 \\\n", " --train_file data/train.json \\\n", " --validation_file data/valid.json \\\n", " --test_file data/test.json \\\n", " --per_device_train_batch_size 24 \\\n", " --per_device_eval_batch_size 24 \\\n", " --do_train \\\n", " --do_eval \\\n", " --do_predict \\\n", " --max_seq_length 128 \\\n", " --learning_rate 2e-5 \\\n", " --num_train_epochs 1 \\\n", " --output_dir out/emotion/gpt2 \\\n", " --overwrite_output_dir \\\n", " --eval_steps 250 \\\n", " --evaluation_strategy steps \\\n", " --metric_for_best_model accuracy \\\n", " --logging_steps 100 \\\n", " --save_total_limit 5 \\\n", " --max_steps 2500 \\\n", " --load_best_model_at_end True " ] }, { "cell_type": "markdown", "source": [ "- full dataset\n", "- custom head" ], "metadata": { "id": "zJeUGay5n1JW" } }, { "cell_type": "code", "source": [ "!python run_glue.py \\\n", " --cache_dir gtp_custom_cache_training \\\n", " --model_name_or_path gpt2 \\\n", " --custom_model gpt2_custom \\\n", " --train_file data/train.json \\\n", " --validation_file data/valid.json \\\n", " --test_file data/test.json \\\n", " --per_device_train_batch_size 24 \\\n", " --per_device_eval_batch_size 24 \\\n", " --do_train \\\n", " --do_eval \\\n", " --do_predict \\\n", " --max_seq_length 128 \\\n", " --learning_rate 2e-5 \\\n", " --num_train_epochs 1 \\\n", " --output_dir out/emotion/gpt2_custom \\\n", " --overwrite_output_dir \\\n", " --eval_steps 250 \\\n", " --evaluation_strategy steps \\\n", " --metric_for_best_model accuracy \\\n", " --logging_steps 100 \\\n", " --save_total_limit 5 \\\n", " --max_steps 2500 \\\n", " --load_best_model_at_end True " ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "LXRMDiD-n1nG", "outputId": "1383e6a3-b485-49a0-d111-05bea71acd23" }, "execution_count": 11, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "2023-02-14 21:56:25.884599: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2023-02-14 21:56:26.040127: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2023-02-14 21:56:26.823479: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n", "2023-02-14 21:56:26.823595: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n", "2023-02-14 21:56:26.823615: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", "WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", "INFO:__main__:Training/evaluation parameters TrainingArguments(\n", "_n_gpu=1,\n", "adafactor=False,\n", "adam_beta1=0.9,\n", "adam_beta2=0.999,\n", "adam_epsilon=1e-08,\n", "auto_find_batch_size=False,\n", "bf16=False,\n", "bf16_full_eval=False,\n", "data_seed=None,\n", "dataloader_drop_last=False,\n", "dataloader_num_workers=0,\n", "dataloader_pin_memory=True,\n", "ddp_bucket_cap_mb=None,\n", "ddp_find_unused_parameters=None,\n", "ddp_timeout=1800,\n", "debug=[],\n", "deepspeed=None,\n", "disable_tqdm=False,\n", "do_eval=True,\n", "do_predict=True,\n", "do_train=True,\n", "eval_accumulation_steps=None,\n", "eval_delay=0,\n", "eval_steps=250,\n", "evaluation_strategy=steps,\n", "fp16=False,\n", "fp16_backend=auto,\n", "fp16_full_eval=False,\n", "fp16_opt_level=O1,\n", "fsdp=[],\n", "fsdp_min_num_params=0,\n", "fsdp_transformer_layer_cls_to_wrap=None,\n", "full_determinism=False,\n", "gradient_accumulation_steps=1,\n", "gradient_checkpointing=False,\n", "greater_is_better=True,\n", "group_by_length=False,\n", "half_precision_backend=auto,\n", "hub_model_id=None,\n", "hub_private_repo=False,\n", "hub_strategy=every_save,\n", "hub_token=,\n", "ignore_data_skip=False,\n", "include_inputs_for_metrics=False,\n", "jit_mode_eval=False,\n", "label_names=None,\n", "label_smoothing_factor=0.0,\n", "learning_rate=2e-05,\n", "length_column_name=length,\n", "load_best_model_at_end=True,\n", "local_rank=-1,\n", "log_level=passive,\n", "log_level_replica=passive,\n", "log_on_each_node=True,\n", "logging_dir=out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00,\n", "logging_first_step=False,\n", "logging_nan_inf_filter=True,\n", "logging_steps=100,\n", "logging_strategy=steps,\n", "lr_scheduler_type=linear,\n", "max_grad_norm=1.0,\n", "max_steps=2500,\n", "metric_for_best_model=accuracy,\n", "mp_parameters=,\n", "no_cuda=False,\n", "num_train_epochs=1.0,\n", "optim=adamw_hf,\n", "output_dir=out/emotion/gpt2_custom,\n", "overwrite_output_dir=True,\n", "past_index=-1,\n", "per_device_eval_batch_size=24,\n", "per_device_train_batch_size=24,\n", "prediction_loss_only=False,\n", "push_to_hub=False,\n", "push_to_hub_model_id=None,\n", "push_to_hub_organization=None,\n", "push_to_hub_token=,\n", "ray_scope=last,\n", "remove_unused_columns=True,\n", "report_to=['tensorboard'],\n", "resume_from_checkpoint=None,\n", "run_name=out/emotion/gpt2_custom,\n", "save_on_each_node=False,\n", "save_steps=500,\n", "save_strategy=steps,\n", "save_total_limit=5,\n", "seed=42,\n", "sharded_ddp=[],\n", "skip_memory_metrics=True,\n", "tf32=None,\n", "torchdynamo=None,\n", "tpu_metrics_debug=False,\n", "tpu_num_cores=None,\n", "use_ipex=False,\n", "use_legacy_prediction_loop=False,\n", "use_mps_device=False,\n", "warmup_ratio=0.0,\n", "warmup_steps=0,\n", "weight_decay=0.0,\n", "xpu_backend=None,\n", ")\n", "INFO:__main__:load a local file for train: data/train.json\n", "INFO:__main__:load a local file for validation: data/valid.json\n", "INFO:__main__:load a local file for test: data/test.json\n", "WARNING:datasets.builder:Using custom data configuration default-01aa9d8252a24a0d\n", "INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n", "INFO:datasets.builder:Generating dataset json (/content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n", "Downloading and preparing dataset json/default to /content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n", "Downloading data files: 100% 3/3 [00:00<00:00, 14138.10it/s]\n", "INFO:datasets.download.download_manager:Downloading took 0.0 min\n", "INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n", "Extracting data files: 100% 3/3 [00:00<00:00, 2175.09it/s]\n", "INFO:datasets.utils.info_utils:Unable to verify checksums.\n", "INFO:datasets.builder:Generating train split\n", "INFO:datasets.builder:Generating validation split\n", "INFO:datasets.builder:Generating test split\n", "INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n", "Dataset json downloaded and prepared to /content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n", "100% 3/3 [00:00<00:00, 672.49it/s]\n", "Downloading (…)lve/main/config.json: 100% 665/665 [00:00<00:00, 123kB/s]\n", "[INFO|configuration_utils.py:653] 2023-02-14 21:56:30,068 >> loading configuration file config.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n", "[INFO|configuration_utils.py:705] 2023-02-14 21:56:30,068 >> Model config GPT2Config {\n", " \"_name_or_path\": \"gpt2\",\n", " \"activation_function\": \"gelu_new\",\n", " \"architectures\": [\n", " \"GPT2LMHeadModel\"\n", " ],\n", " \"attn_pdrop\": 0.1,\n", " \"bos_token_id\": 50256,\n", " \"embd_pdrop\": 0.1,\n", " \"eos_token_id\": 50256,\n", " \"id2label\": {\n", " \"0\": \"LABEL_0\",\n", " \"1\": \"LABEL_1\",\n", " \"2\": \"LABEL_2\",\n", " \"3\": \"LABEL_3\",\n", " \"4\": \"LABEL_4\",\n", " \"5\": \"LABEL_5\"\n", " },\n", " \"initializer_range\": 0.02,\n", " \"label2id\": {\n", " \"LABEL_0\": 0,\n", " \"LABEL_1\": 1,\n", " \"LABEL_2\": 2,\n", " \"LABEL_3\": 3,\n", " \"LABEL_4\": 4,\n", " \"LABEL_5\": 5\n", " },\n", " \"layer_norm_epsilon\": 1e-05,\n", " \"model_type\": \"gpt2\",\n", " \"n_ctx\": 1024,\n", " \"n_embd\": 768,\n", " \"n_head\": 12,\n", " \"n_inner\": null,\n", " \"n_layer\": 12,\n", " \"n_positions\": 1024,\n", " \"reorder_and_upcast_attn\": false,\n", " \"resid_pdrop\": 0.1,\n", " \"scale_attn_by_inverse_layer_idx\": false,\n", " \"scale_attn_weights\": true,\n", " \"summary_activation\": null,\n", " \"summary_first_dropout\": 0.1,\n", " \"summary_proj_to_labels\": true,\n", " \"summary_type\": \"cls_index\",\n", " \"summary_use_proj\": true,\n", " \"task_specific_params\": {\n", " \"text-generation\": {\n", " \"do_sample\": true,\n", " \"max_length\": 50\n", " }\n", " },\n", " \"transformers_version\": \"4.23.1\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 50257\n", "}\n", "\n", "[INFO|tokenization_auto.py:418] 2023-02-14 21:56:30,162 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n", "[INFO|configuration_utils.py:653] 2023-02-14 21:56:30,251 >> loading configuration file config.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n", "[INFO|configuration_utils.py:705] 2023-02-14 21:56:30,252 >> Model config GPT2Config {\n", " \"_name_or_path\": \"gpt2\",\n", " \"activation_function\": \"gelu_new\",\n", " \"architectures\": [\n", " \"GPT2LMHeadModel\"\n", " ],\n", " \"attn_pdrop\": 0.1,\n", " \"bos_token_id\": 50256,\n", " \"embd_pdrop\": 0.1,\n", " \"eos_token_id\": 50256,\n", " \"initializer_range\": 0.02,\n", " \"layer_norm_epsilon\": 1e-05,\n", " \"model_type\": \"gpt2\",\n", " \"n_ctx\": 1024,\n", " \"n_embd\": 768,\n", " \"n_head\": 12,\n", " \"n_inner\": null,\n", " \"n_layer\": 12,\n", " \"n_positions\": 1024,\n", " \"reorder_and_upcast_attn\": false,\n", " \"resid_pdrop\": 0.1,\n", " \"scale_attn_by_inverse_layer_idx\": false,\n", " \"scale_attn_weights\": true,\n", " \"summary_activation\": null,\n", " \"summary_first_dropout\": 0.1,\n", " \"summary_proj_to_labels\": true,\n", " \"summary_type\": \"cls_index\",\n", " \"summary_use_proj\": true,\n", " \"task_specific_params\": {\n", " \"text-generation\": {\n", " \"do_sample\": true,\n", " \"max_length\": 50\n", " }\n", " },\n", " \"transformers_version\": \"4.23.1\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 50257\n", "}\n", "\n", "Downloading (…)olve/main/vocab.json: 100% 1.04M/1.04M [00:00<00:00, 9.18MB/s]\n", "Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 4.90MB/s]\n", "Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 14.3MB/s]\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file vocab.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/vocab.json\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file merges.txt from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/merges.txt\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file tokenizer.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer.json\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file added_tokens.json from cache at None\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file special_tokens_map.json from cache at None\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file tokenizer_config.json from cache at None\n", "[INFO|configuration_utils.py:653] 2023-02-14 21:56:31,525 >> loading configuration file config.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n", "[INFO|configuration_utils.py:705] 2023-02-14 21:56:31,526 >> Model config GPT2Config {\n", " \"_name_or_path\": \"gpt2\",\n", " \"activation_function\": \"gelu_new\",\n", " \"architectures\": [\n", " \"GPT2LMHeadModel\"\n", " ],\n", " \"attn_pdrop\": 0.1,\n", " \"bos_token_id\": 50256,\n", " \"embd_pdrop\": 0.1,\n", " \"eos_token_id\": 50256,\n", " \"initializer_range\": 0.02,\n", " \"layer_norm_epsilon\": 1e-05,\n", " \"model_type\": \"gpt2\",\n", " \"n_ctx\": 1024,\n", " \"n_embd\": 768,\n", " \"n_head\": 12,\n", " \"n_inner\": null,\n", " \"n_layer\": 12,\n", " \"n_positions\": 1024,\n", " \"reorder_and_upcast_attn\": false,\n", " \"resid_pdrop\": 0.1,\n", " \"scale_attn_by_inverse_layer_idx\": false,\n", " \"scale_attn_weights\": true,\n", " \"summary_activation\": null,\n", " \"summary_first_dropout\": 0.1,\n", " \"summary_proj_to_labels\": true,\n", " \"summary_type\": \"cls_index\",\n", " \"summary_use_proj\": true,\n", " \"task_specific_params\": {\n", " \"text-generation\": {\n", " \"do_sample\": true,\n", " \"max_length\": 50\n", " }\n", " },\n", " \"transformers_version\": \"4.23.1\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 50257\n", "}\n", "\n", "INFO:__main__:Using hidden states in model: False\n", "INFO:__main__:Using implementation from class: GPT2ForSequenceClassificationCustom\n", "Downloading (…)\"pytorch_model.bin\";: 100% 548M/548M [00:05<00:00, 108MB/s]\n", "[INFO|modeling_utils.py:2156] 2023-02-14 21:56:36,895 >> loading weights file pytorch_model.bin from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin\n", "[INFO|modeling_utils.py:2606] 2023-02-14 21:56:39,410 >> All model checkpoint weights were used when initializing GPT2ForSequenceClassificationCustom.\n", "\n", "[WARNING|modeling_utils.py:2608] 2023-02-14 21:56:39,410 >> Some weights of GPT2ForSequenceClassificationCustom were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.dense_1_hidden.bias', 'score.dense_1_input.weight', 'score.dense_2.bias', 'score.dense_2.weight', 'score.out_proj.weight', 'score.dense_1_hidden.weight', 'score.dense_1_input.bias']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", "[ERROR|tokenization_utils_base.py:1019] 2023-02-14 21:56:39,418 >> Using pad_token, but it is not set yet.\n", "INFO:__main__:Set PAD token to EOS: <|endoftext|>\n", "Running tokenizer on dataset: 0% 0/16 [00:00> max_steps is given, it will override any value given in num_train_epochs\n", "[INFO|trainer.py:725] 2023-02-14 21:56:42,941 >> The following columns in the training set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n", "/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", " warnings.warn(\n", "[INFO|trainer.py:1607] 2023-02-14 21:56:42,947 >> ***** Running training *****\n", "[INFO|trainer.py:1608] 2023-02-14 21:56:42,947 >> Num examples = 16000\n", "[INFO|trainer.py:1609] 2023-02-14 21:56:42,947 >> Num Epochs = 4\n", "[INFO|trainer.py:1610] 2023-02-14 21:56:42,947 >> Instantaneous batch size per device = 24\n", "[INFO|trainer.py:1611] 2023-02-14 21:56:42,947 >> Total train batch size (w. parallel, distributed & accumulation) = 24\n", "[INFO|trainer.py:1612] 2023-02-14 21:56:42,947 >> Gradient Accumulation steps = 1\n", "[INFO|trainer.py:1613] 2023-02-14 21:56:42,947 >> Total optimization steps = 2500\n", "{'loss': 1.6218, 'learning_rate': 1.9200000000000003e-05, 'epoch': 0.15}\n", "{'loss': 1.1593, 'learning_rate': 1.8400000000000003e-05, 'epoch': 0.3}\n", " 10% 250/2500 [00:39<05:43, 6.56it/s][INFO|trainer.py:725] 2023-02-14 21:57:22,025 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:57:22,027 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:57:22,027 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:57:22,027 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:58:04,248 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:58:04,248 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:58:04,248 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-500\n", "[INFO|configuration_utils.py:447] 2023-02-14 21:58:08,381 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-500/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 21:58:09,983 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-500/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:58:09,984 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-500/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:58:09,984 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-500/special_tokens_map.json\n", "{'loss': 0.356, 'learning_rate': 1.5200000000000002e-05, 'epoch': 0.9}\n", "{'loss': 0.2714, 'learning_rate': 1.4400000000000001e-05, 'epoch': 1.05}\n", " 30% 750/2500 [02:07<04:25, 6.59it/s][INFO|trainer.py:725] 2023-02-14 21:58:49,972 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:58:49,973 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:58:49,974 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:58:49,974 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 21:59:32,170 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 21:59:32,170 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 21:59:32,171 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-1000\n", "[INFO|configuration_utils.py:447] 2023-02-14 21:59:36,294 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-1000/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 21:59:37,744 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-1000/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:59:37,744 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-1000/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:59:37,744 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-1000/special_tokens_map.json\n", "{'loss': 0.1836, 'learning_rate': 1.1200000000000001e-05, 'epoch': 1.65}\n", "{'loss': 0.1844, 'learning_rate': 1.04e-05, 'epoch': 1.8}\n", " 50% 1250/2500 [03:34<03:09, 6.59it/s][INFO|trainer.py:725] 2023-02-14 22:00:17,827 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 22:00:17,829 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:00:17,829 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:00:17,829 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 22:00:59,988 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:00:59,988 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:00:59,988 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-1500\n", "[INFO|configuration_utils.py:447] 2023-02-14 22:01:04,120 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-1500/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 22:01:05,576 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-1500/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:01:05,576 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-1500/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:01:05,576 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-1500/special_tokens_map.json\n", "{'loss': 0.1497, 'learning_rate': 7.2000000000000005e-06, 'epoch': 2.4}\n", "{'loss': 0.1496, 'learning_rate': 6.4000000000000006e-06, 'epoch': 2.55}\n", " 70% 1750/2500 [05:02<01:54, 6.54it/s][INFO|trainer.py:725] 2023-02-14 22:01:45,617 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 22:01:45,618 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:01:45,619 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:01:45,619 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 22:02:27,846 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:02:27,846 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:02:27,846 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-2000\n", "[INFO|configuration_utils.py:447] 2023-02-14 22:02:31,976 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-2000/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 22:02:33,429 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-2000/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:02:33,430 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-2000/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:02:33,430 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-2000/special_tokens_map.json\n", "{'loss': 0.104, 'learning_rate': 3.2000000000000003e-06, 'epoch': 3.15}\n", "{'loss': 0.1206, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.3}\n", " 90% 2250/2500 [06:30<00:38, 6.55it/s][INFO|trainer.py:725] 2023-02-14 22:03:13,484 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 22:03:13,486 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:03:13,486 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:03:13,486 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 22:03:55,705 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:03:55,705 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:03:55,706 >> Batch size = 24\n", "\n", " 0% 0/84 [00:00> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-2500\n", "[INFO|configuration_utils.py:447] 2023-02-14 22:03:59,823 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-2500/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 22:04:00,568 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-2500/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:04:00,569 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-2500/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:04:00,569 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-2500/special_tokens_map.json\n", "[INFO|trainer.py:1852] 2023-02-14 22:04:02,582 >> \n", "\n", "Training completed. Do not forget to share your model on huggingface.co/models =)\n", "\n", "\n", "[INFO|trainer.py:1946] 2023-02-14 22:04:02,582 >> Loading best model from out/emotion/gpt2_custom/checkpoint-2000 (score: 0.9365000128746033).\n", "{'train_runtime': 440.0758, 'train_samples_per_second': 136.34, 'train_steps_per_second': 5.681, 'train_loss': 0.32335229415893557, 'epoch': 3.75}\n", "100% 2500/2500 [07:20<00:00, 5.68it/s]\n", "[INFO|trainer.py:2656] 2023-02-14 22:04:03,025 >> Saving model checkpoint to out/emotion/gpt2_custom\n", "[INFO|configuration_utils.py:447] 2023-02-14 22:04:03,026 >> Configuration saved in out/emotion/gpt2_custom/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 22:04:03,965 >> Model weights saved in out/emotion/gpt2_custom/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:04:03,966 >> tokenizer config file saved in out/emotion/gpt2_custom/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:04:03,966 >> Special tokens file saved in out/emotion/gpt2_custom/special_tokens_map.json\n", "***** train metrics *****\n", " epoch = 3.75\n", " train_loss = 0.3234\n", " train_runtime = 0:07:20.07\n", " train_samples = 16000\n", " train_samples_per_second = 136.34\n", " train_steps_per_second = 5.681\n", "INFO:__main__:*** Evaluate ***\n", "[INFO|trainer.py:725] 2023-02-14 22:04:04,068 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 22:04:04,069 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:04:04,069 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:04:04,070 >> Batch size = 24\n", "100% 84/84 [00:04<00:00, 20.35it/s]\n", "***** eval metrics *****\n", " epoch = 3.75\n", " eval_accuracy = 0.9365\n", " eval_loss = 0.1436\n", " eval_runtime = 0:00:04.18\n", " eval_samples = 2000\n", " eval_samples_per_second = 477.778\n", " eval_steps_per_second = 20.067\n", "INFO:__main__:*** Predict ***\n", "[INFO|trainer.py:725] 2023-02-14 22:04:08,259 >> The following columns in the test set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n", "[INFO|trainer.py:2907] 2023-02-14 22:04:08,260 >> ***** Running Prediction *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:04:08,260 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:04:08,260 >> Batch size = 24\n", "100% 84/84 [00:04<00:00, 20.62it/s]\n", "INFO:__main__:***** Predict results None *****\n", "[INFO|modelcard.py:444] 2023-02-14 22:04:12,537 >> Dropping the following result as it does not have all the necessary fields:\n", "{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.9365000128746033}]}\n" ] } ] }, { "cell_type": "markdown", "metadata": { "id": "VrHmnOaT7ICl" }, "source": [ "## **T5**" ] }, { "cell_type": "markdown", "metadata": { "id": "CmuDde477ICl" }, "source": [ "- full data\n", "- model `T5`\n", "- sequnece length: 128\n", "- training epoch: 1\n", "- first few layers frozen" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "id": "2ruXjeqj7ICl", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "4d73b407-08c3-4007-aa32-c8709dd696fa" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "2023-02-14 22:04:17.129470: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2023-02-14 22:04:17.281426: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2023-02-14 22:04:18.087509: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n", "2023-02-14 22:04:18.087605: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n", "2023-02-14 22:04:18.087624: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n", "WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n", "INFO:__main__:Training/evaluation parameters Seq2SeqTrainingArguments(\n", "_n_gpu=1,\n", "adafactor=False,\n", "adam_beta1=0.9,\n", "adam_beta2=0.999,\n", "adam_epsilon=1e-08,\n", "auto_find_batch_size=False,\n", "bf16=False,\n", "bf16_full_eval=False,\n", "data_seed=None,\n", "dataloader_drop_last=False,\n", "dataloader_num_workers=0,\n", "dataloader_pin_memory=True,\n", "ddp_bucket_cap_mb=None,\n", "ddp_find_unused_parameters=None,\n", "ddp_timeout=1800,\n", "debug=[],\n", "deepspeed=None,\n", "disable_tqdm=False,\n", "do_eval=True,\n", "do_predict=True,\n", "do_train=True,\n", "eval_accumulation_steps=None,\n", "eval_delay=0,\n", "eval_steps=250,\n", "evaluation_strategy=steps,\n", "fp16=False,\n", "fp16_backend=auto,\n", "fp16_full_eval=False,\n", "fp16_opt_level=O1,\n", "fsdp=[],\n", "fsdp_min_num_params=0,\n", "fsdp_transformer_layer_cls_to_wrap=None,\n", "full_determinism=False,\n", "generation_max_length=128,\n", "generation_num_beams=None,\n", "gradient_accumulation_steps=1,\n", "gradient_checkpointing=False,\n", "greater_is_better=True,\n", "group_by_length=False,\n", "half_precision_backend=auto,\n", "hub_model_id=None,\n", "hub_private_repo=False,\n", "hub_strategy=every_save,\n", "hub_token=,\n", "ignore_data_skip=False,\n", "include_inputs_for_metrics=False,\n", "jit_mode_eval=False,\n", "label_names=None,\n", "label_smoothing_factor=0.0,\n", "learning_rate=5e-05,\n", "length_column_name=length,\n", "load_best_model_at_end=True,\n", "local_rank=-1,\n", "log_level=passive,\n", "log_level_replica=passive,\n", "log_on_each_node=True,\n", "logging_dir=out/emotion/t5_v1_1/runs/Feb14_22-04-20_fc0011e45a00,\n", "logging_first_step=False,\n", "logging_nan_inf_filter=True,\n", "logging_steps=100,\n", "logging_strategy=steps,\n", "lr_scheduler_type=linear,\n", "max_grad_norm=1.0,\n", "max_steps=2500,\n", "metric_for_best_model=accuracy,\n", "mp_parameters=,\n", "no_cuda=False,\n", "num_train_epochs=1.0,\n", "optim=adamw_hf,\n", "output_dir=out/emotion/t5_v1_1,\n", "overwrite_output_dir=True,\n", "past_index=-1,\n", "per_device_eval_batch_size=8,\n", "per_device_train_batch_size=8,\n", "predict_with_generate=True,\n", "prediction_loss_only=False,\n", "push_to_hub=False,\n", "push_to_hub_model_id=None,\n", "push_to_hub_organization=None,\n", "push_to_hub_token=,\n", "ray_scope=last,\n", "remove_unused_columns=True,\n", "report_to=['tensorboard'],\n", "resume_from_checkpoint=None,\n", "run_name=out/emotion/t5_v1_1,\n", "save_on_each_node=False,\n", "save_steps=500,\n", "save_strategy=steps,\n", "save_total_limit=5,\n", "seed=42,\n", "sharded_ddp=[],\n", "skip_memory_metrics=True,\n", "sortish_sampler=False,\n", "tf32=None,\n", "torchdynamo=None,\n", "tpu_metrics_debug=False,\n", "tpu_num_cores=None,\n", "use_ipex=False,\n", "use_legacy_prediction_loop=False,\n", "use_mps_device=False,\n", "warmup_ratio=0.0,\n", "warmup_steps=0,\n", "weight_decay=0.0,\n", "xpu_backend=None,\n", ")\n", "WARNING:datasets.builder:Using custom data configuration default-a82ca4164dba097e\n", "INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n", "INFO:datasets.builder:Generating dataset json (/content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n", "Downloading and preparing dataset json/default to /content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n", "Downloading data files: 100% 3/3 [00:00<00:00, 11848.32it/s]\n", "INFO:datasets.download.download_manager:Downloading took 0.0 min\n", "INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n", "Extracting data files: 100% 3/3 [00:00<00:00, 2097.85it/s]\n", "INFO:datasets.utils.info_utils:Unable to verify checksums.\n", "INFO:datasets.builder:Generating train split\n", "INFO:datasets.builder:Generating validation split\n", "INFO:datasets.builder:Generating test split\n", "INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n", "Dataset json downloaded and prepared to /content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n", "100% 3/3 [00:00<00:00, 953.83it/s]\n", "Downloading (…)lve/main/config.json: 100% 537/537 [00:00<00:00, 97.0kB/s]\n", "[INFO|configuration_utils.py:653] 2023-02-14 22:04:20,972 >> loading configuration file config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json\n", "[INFO|configuration_utils.py:705] 2023-02-14 22:04:20,975 >> Model config T5Config {\n", " \"_name_or_path\": \"google/t5-v1_1-small\",\n", " \"architectures\": [\n", " \"T5ForConditionalGeneration\"\n", " ],\n", " \"d_ff\": 1024,\n", " \"d_kv\": 64,\n", " \"d_model\": 512,\n", " \"decoder_start_token_id\": 0,\n", " \"dense_act_fn\": \"gelu_new\",\n", " \"dropout_rate\": 0.1,\n", " \"eos_token_id\": 1,\n", " \"feed_forward_proj\": \"gated-gelu\",\n", " \"initializer_factor\": 1.0,\n", " \"is_encoder_decoder\": true,\n", " \"is_gated_act\": true,\n", " \"layer_norm_epsilon\": 1e-06,\n", " \"model_type\": \"t5\",\n", " \"num_decoder_layers\": 8,\n", " \"num_heads\": 6,\n", " \"num_layers\": 8,\n", " \"output_past\": true,\n", " \"pad_token_id\": 0,\n", " \"relative_attention_max_distance\": 128,\n", " \"relative_attention_num_buckets\": 32,\n", " \"tie_word_embeddings\": false,\n", " \"transformers_version\": \"4.23.1\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 32128\n", "}\n", "\n", "Downloading (…)okenizer_config.json: 100% 1.86k/1.86k [00:00<00:00, 853kB/s]\n", "[INFO|configuration_utils.py:653] 2023-02-14 22:04:21,160 >> loading configuration file config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json\n", "[INFO|configuration_utils.py:705] 2023-02-14 22:04:21,160 >> Model config T5Config {\n", " \"_name_or_path\": \"google/t5-v1_1-small\",\n", " \"architectures\": [\n", " \"T5ForConditionalGeneration\"\n", " ],\n", " \"d_ff\": 1024,\n", " \"d_kv\": 64,\n", " \"d_model\": 512,\n", " \"decoder_start_token_id\": 0,\n", " \"dense_act_fn\": \"gelu_new\",\n", " \"dropout_rate\": 0.1,\n", " \"eos_token_id\": 1,\n", " \"feed_forward_proj\": \"gated-gelu\",\n", " \"initializer_factor\": 1.0,\n", " \"is_encoder_decoder\": true,\n", " \"is_gated_act\": true,\n", " \"layer_norm_epsilon\": 1e-06,\n", " \"model_type\": \"t5\",\n", " \"num_decoder_layers\": 8,\n", " \"num_heads\": 6,\n", " \"num_layers\": 8,\n", " \"output_past\": true,\n", " \"pad_token_id\": 0,\n", " \"relative_attention_max_distance\": 128,\n", " \"relative_attention_num_buckets\": 32,\n", " \"tie_word_embeddings\": false,\n", " \"transformers_version\": \"4.23.1\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 32128\n", "}\n", "\n", "Downloading (…)ve/main/spiece.model: 100% 792k/792k [00:00<00:00, 10.2MB/s]\n", "Downloading (…)cial_tokens_map.json: 100% 1.79k/1.79k [00:00<00:00, 705kB/s]\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file spiece.model from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/spiece.model\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file tokenizer.json from cache at None\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file added_tokens.json from cache at None\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file special_tokens_map.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/special_tokens_map.json\n", "[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file tokenizer_config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/tokenizer_config.json\n", "[INFO|configuration_utils.py:653] 2023-02-14 22:04:21,838 >> loading configuration file config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json\n", "[INFO|configuration_utils.py:705] 2023-02-14 22:04:21,838 >> Model config T5Config {\n", " \"_name_or_path\": \"google/t5-v1_1-small\",\n", " \"architectures\": [\n", " \"T5ForConditionalGeneration\"\n", " ],\n", " \"d_ff\": 1024,\n", " \"d_kv\": 64,\n", " \"d_model\": 512,\n", " \"decoder_start_token_id\": 0,\n", " \"dense_act_fn\": \"gelu_new\",\n", " \"dropout_rate\": 0.1,\n", " \"eos_token_id\": 1,\n", " \"feed_forward_proj\": \"gated-gelu\",\n", " \"initializer_factor\": 1.0,\n", " \"is_encoder_decoder\": true,\n", " \"is_gated_act\": true,\n", " \"layer_norm_epsilon\": 1e-06,\n", " \"model_type\": \"t5\",\n", " \"num_decoder_layers\": 8,\n", " \"num_heads\": 6,\n", " \"num_layers\": 8,\n", " \"output_past\": true,\n", " \"pad_token_id\": 0,\n", " \"relative_attention_max_distance\": 128,\n", " \"relative_attention_num_buckets\": 32,\n", " \"tie_word_embeddings\": false,\n", " \"transformers_version\": \"4.23.1\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 32128\n", "}\n", "\n", "[INFO|configuration_utils.py:653] 2023-02-14 22:04:21,888 >> loading configuration file config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json\n", "[INFO|configuration_utils.py:705] 2023-02-14 22:04:21,889 >> Model config T5Config {\n", " \"_name_or_path\": \"google/t5-v1_1-small\",\n", " \"architectures\": [\n", " \"T5ForConditionalGeneration\"\n", " ],\n", " \"d_ff\": 1024,\n", " \"d_kv\": 64,\n", " \"d_model\": 512,\n", " \"decoder_start_token_id\": 0,\n", " \"dense_act_fn\": \"gelu_new\",\n", " \"dropout_rate\": 0.1,\n", " \"eos_token_id\": 1,\n", " \"feed_forward_proj\": \"gated-gelu\",\n", " \"initializer_factor\": 1.0,\n", " \"is_encoder_decoder\": true,\n", " \"is_gated_act\": true,\n", " \"layer_norm_epsilon\": 1e-06,\n", " \"model_type\": \"t5\",\n", " \"num_decoder_layers\": 8,\n", " \"num_heads\": 6,\n", " \"num_layers\": 8,\n", " \"output_past\": true,\n", " \"pad_token_id\": 0,\n", " \"relative_attention_max_distance\": 128,\n", " \"relative_attention_num_buckets\": 32,\n", " \"tie_word_embeddings\": false,\n", " \"transformers_version\": \"4.23.1\",\n", " \"use_cache\": true,\n", " \"vocab_size\": 32128\n", "}\n", "\n", "Downloading (…)\"pytorch_model.bin\";: 100% 308M/308M [00:03<00:00, 84.8MB/s]\n", "[INFO|modeling_utils.py:2156] 2023-02-14 22:04:26,050 >> loading weights file pytorch_model.bin from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/pytorch_model.bin\n", "[INFO|modeling_utils.py:2606] 2023-02-14 22:04:27,048 >> All model checkpoint weights were used when initializing T5ForConditionalGeneration.\n", "\n", "[INFO|modeling_utils.py:2614] 2023-02-14 22:04:27,048 >> All the weights of T5ForConditionalGeneration were initialized from the model checkpoint at google/t5-v1_1-small.\n", "If your task is similar to the task the model of the checkpoint was trained on, you can already use T5ForConditionalGeneration for predictions without further training.\n", "\n", "\n", "Frozen layers:\n", "[('encoder.block.1.layer.0.SelfAttention.q.weight', False), ('encoder.block.1.layer.0.SelfAttention.k.weight', False), ('encoder.block.1.layer.0.SelfAttention.v.weight', False), ('encoder.block.1.layer.0.SelfAttention.o.weight', False), ('encoder.block.1.layer.0.layer_norm.weight', False), ('encoder.block.1.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.1.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.1.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.1.layer.1.layer_norm.weight', False), ('encoder.block.2.layer.0.SelfAttention.q.weight', False), ('encoder.block.2.layer.0.SelfAttention.k.weight', False), ('encoder.block.2.layer.0.SelfAttention.v.weight', False), ('encoder.block.2.layer.0.SelfAttention.o.weight', False), ('encoder.block.2.layer.0.layer_norm.weight', False), ('encoder.block.2.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.2.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.2.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.2.layer.1.layer_norm.weight', False), ('encoder.block.3.layer.0.SelfAttention.q.weight', False), ('encoder.block.3.layer.0.SelfAttention.k.weight', False), ('encoder.block.3.layer.0.SelfAttention.v.weight', False), ('encoder.block.3.layer.0.SelfAttention.o.weight', False), ('encoder.block.3.layer.0.layer_norm.weight', False), ('encoder.block.3.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.3.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.3.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.3.layer.1.layer_norm.weight', False), ('encoder.block.4.layer.0.SelfAttention.q.weight', False), ('encoder.block.4.layer.0.SelfAttention.k.weight', False), ('encoder.block.4.layer.0.SelfAttention.v.weight', False), ('encoder.block.4.layer.0.SelfAttention.o.weight', False), ('encoder.block.4.layer.0.layer_norm.weight', False), ('encoder.block.4.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.4.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.4.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.4.layer.1.layer_norm.weight', False), ('encoder.block.5.layer.0.SelfAttention.q.weight', False), ('encoder.block.5.layer.0.SelfAttention.k.weight', False), ('encoder.block.5.layer.0.SelfAttention.v.weight', False), ('encoder.block.5.layer.0.SelfAttention.o.weight', False), ('encoder.block.5.layer.0.layer_norm.weight', False), ('encoder.block.5.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.5.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.5.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.5.layer.1.layer_norm.weight', False), ('encoder.block.6.layer.0.SelfAttention.q.weight', False), ('encoder.block.6.layer.0.SelfAttention.k.weight', False), ('encoder.block.6.layer.0.SelfAttention.v.weight', False), ('encoder.block.6.layer.0.SelfAttention.o.weight', False), ('encoder.block.6.layer.0.layer_norm.weight', False), ('encoder.block.6.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.6.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.6.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.6.layer.1.layer_norm.weight', False), ('encoder.block.7.layer.0.SelfAttention.q.weight', False), ('encoder.block.7.layer.0.SelfAttention.k.weight', False), ('encoder.block.7.layer.0.SelfAttention.v.weight', False), ('encoder.block.7.layer.0.SelfAttention.o.weight', False), ('encoder.block.7.layer.0.layer_norm.weight', False), ('encoder.block.7.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.7.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.7.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.7.layer.1.layer_norm.weight', False)] \n", "\n", "\n", "INFO:__main__:Using translation prefix: \"emotion classification: \"\n", "Running tokenizer on train dataset: 0% 0/16 [00:00> max_steps is given, it will override any value given in num_train_epochs\n", "/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", " warnings.warn(\n", "[INFO|trainer.py:1607] 2023-02-14 22:04:30,911 >> ***** Running training *****\n", "[INFO|trainer.py:1608] 2023-02-14 22:04:30,911 >> Num examples = 16000\n", "[INFO|trainer.py:1609] 2023-02-14 22:04:30,911 >> Num Epochs = 2\n", "[INFO|trainer.py:1610] 2023-02-14 22:04:30,911 >> Instantaneous batch size per device = 8\n", "[INFO|trainer.py:1611] 2023-02-14 22:04:30,911 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n", "[INFO|trainer.py:1612] 2023-02-14 22:04:30,911 >> Gradient Accumulation steps = 1\n", "[INFO|trainer.py:1613] 2023-02-14 22:04:30,911 >> Total optimization steps = 2500\n", " 0% 0/2500 [00:00> You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n", "{'loss': 21.5908, 'learning_rate': 4.8e-05, 'epoch': 0.05}\n", "{'loss': 14.8264, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.1}\n", " 10% 249/2500 [00:24<03:31, 10.64it/s][INFO|trainer.py:2907] 2023-02-14 22:04:55,366 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:04:55,366 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:04:55,366 >> Batch size = 8\n", "\n", " 0% 0/250 [00:00> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:05:35,963 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:05:35,963 >> Batch size = 8\n", "\n", " 0% 0/250 [00:00> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-500\n", "[INFO|configuration_utils.py:447] 2023-02-14 22:05:53,120 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-500/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 22:05:53,749 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-500/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:05:53,750 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-500/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:05:53,750 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-500/special_tokens_map.json\n", "[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:05:53,788 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-500/spiece.model\n", "{'loss': 3.7795, 'learning_rate': 3.8e-05, 'epoch': 0.3}\n", "{'loss': 2.9169, 'learning_rate': 3.6e-05, 'epoch': 0.35}\n", " 30% 749/2500 [01:47<02:43, 10.71it/s][INFO|trainer.py:2907] 2023-02-14 22:06:18,135 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:06:18,136 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:06:18,136 >> Batch size = 8\n", "\n", " 0% 0/250 [00:00> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:06:58,636 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:06:58,636 >> Batch size = 8\n", "\n", " 0% 0/250 [00:00> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-1000\n", "[INFO|configuration_utils.py:447] 2023-02-14 22:07:15,785 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-1000/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 22:07:16,414 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-1000/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:07:16,415 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-1000/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:07:16,416 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-1000/special_tokens_map.json\n", "[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:07:16,453 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-1000/spiece.model\n", "{'loss': 1.9003, 'learning_rate': 2.8000000000000003e-05, 'epoch': 0.55}\n", "{'loss': 1.7884, 'learning_rate': 2.6000000000000002e-05, 'epoch': 0.6}\n", " 50% 1249/2500 [03:09<01:59, 10.49it/s][INFO|trainer.py:2907] 2023-02-14 22:07:40,879 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:07:40,879 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:07:40,879 >> Batch size = 8\n", "\n", " 0% 0/250 [00:00> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:08:21,433 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:08:21,433 >> Batch size = 8\n", "\n", " 0% 0/250 [00:00> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-1500\n", "[INFO|configuration_utils.py:447] 2023-02-14 22:08:38,814 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-1500/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 22:08:39,285 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-1500/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:08:39,286 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-1500/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:08:39,286 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-1500/special_tokens_map.json\n", "[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:08:39,322 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-1500/spiece.model\n", "{'loss': 1.4835, 'learning_rate': 1.8e-05, 'epoch': 0.8}\n", "{'loss': 1.449, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.85}\n", " 70% 1749/2500 [04:32<01:10, 10.61it/s][INFO|trainer.py:2907] 2023-02-14 22:09:03,363 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:09:03,363 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:09:03,363 >> Batch size = 8\n", "\n", " 0% 0/250 [00:00> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:09:43,863 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:09:43,863 >> Batch size = 8\n", "\n", " 0% 0/250 [00:00> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-2000\n", "[INFO|configuration_utils.py:447] 2023-02-14 22:10:01,105 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-2000/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 22:10:01,585 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-2000/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:10:01,586 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-2000/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:10:01,586 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-2000/special_tokens_map.json\n", "[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:10:01,623 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-2000/spiece.model\n", "{'loss': 1.2708, 'learning_rate': 8.000000000000001e-06, 'epoch': 1.05}\n", "{'loss': 1.3351, 'learning_rate': 6e-06, 'epoch': 1.1}\n", " 90% 2249/2500 [05:54<00:23, 10.80it/s][INFO|trainer.py:2907] 2023-02-14 22:10:25,736 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:10:25,736 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:10:25,736 >> Batch size = 8\n", "\n", " 0% 0/250 [00:00> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:11:06,283 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:11:06,283 >> Batch size = 8\n", "\n", " 0% 0/250 [00:00> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-2500\n", "[INFO|configuration_utils.py:447] 2023-02-14 22:11:23,557 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-2500/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 22:11:24,033 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-2500/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:11:24,034 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-2500/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:11:24,034 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-2500/special_tokens_map.json\n", "[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:11:24,070 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-2500/spiece.model\n", "[INFO|trainer.py:1852] 2023-02-14 22:11:24,853 >> \n", "\n", "Training completed. Do not forget to share your model on huggingface.co/models =)\n", "\n", "\n", "[INFO|trainer.py:1946] 2023-02-14 22:11:24,854 >> Loading best model from out/emotion/t5_v1_1/checkpoint-500 (score: 1.0).\n", "{'train_runtime': 414.2608, 'train_samples_per_second': 48.279, 'train_steps_per_second': 6.035, 'train_loss': 3.8232721221923827, 'epoch': 1.25}\n", "100% 2500/2500 [06:54<00:00, 6.03it/s]\n", "[INFO|trainer.py:2656] 2023-02-14 22:11:25,173 >> Saving model checkpoint to out/emotion/t5_v1_1\n", "[INFO|configuration_utils.py:447] 2023-02-14 22:11:25,174 >> Configuration saved in out/emotion/t5_v1_1/config.json\n", "[INFO|modeling_utils.py:1624] 2023-02-14 22:11:25,662 >> Model weights saved in out/emotion/t5_v1_1/pytorch_model.bin\n", "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:11:25,663 >> tokenizer config file saved in out/emotion/t5_v1_1/tokenizer_config.json\n", "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:11:25,663 >> Special tokens file saved in out/emotion/t5_v1_1/special_tokens_map.json\n", "[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:11:25,703 >> Copy vocab file to out/emotion/t5_v1_1/spiece.model\n", "***** train metrics *****\n", " epoch = 1.25\n", " train_loss = 3.8233\n", " train_runtime = 0:06:54.26\n", " train_samples = 16000\n", " train_samples_per_second = 48.279\n", " train_steps_per_second = 6.035\n", "INFO:__main__:*** Evaluate ***\n", "[INFO|trainer.py:2907] 2023-02-14 22:11:25,713 >> ***** Running Evaluation *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:11:25,713 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:11:25,713 >> Batch size = 8\n", "100% 250/250 [00:17<00:00, 14.50it/s]\n", "***** eval metrics *****\n", " epoch = 1.25\n", " eval_accuracy = 1.0\n", " eval_bleu = 0.0\n", " eval_gen_len = 2.0\n", " eval_loss = 2.1697\n", " eval_runtime = 0:00:17.31\n", " eval_samples = 2000\n", " eval_samples_per_second = 115.494\n", " eval_steps_per_second = 14.437\n", "INFO:__main__:*** Predict ***\n", "[INFO|trainer.py:2907] 2023-02-14 22:11:43,033 >> ***** Running Prediction *****\n", "[INFO|trainer.py:2909] 2023-02-14 22:11:43,033 >> Num examples = 2000\n", "[INFO|trainer.py:2912] 2023-02-14 22:11:43,034 >> Batch size = 8\n", "100% 250/250 [00:17<00:00, 14.58it/s]\n", "***** predict metrics *****\n", " predict_accuracy = 1.0\n", " predict_bleu = 0.0\n", " predict_gen_len = 2.0\n", " predict_loss = 2.1029\n", " predict_runtime = 0:00:17.21\n", " predict_samples = 2000\n", " predict_samples_per_second = 116.158\n", " predict_steps_per_second = 14.52\n", "[INFO|modelcard.py:444] 2023-02-14 22:12:00,417 >> Dropping the following result as it does not have all the necessary fields:\n", "{'task': {'name': 'Translation', 'type': 'translation'}, 'metrics': [{'name': 'Bleu', 'type': 'bleu', 'value': 0.0}, {'name': 'Accuracy', 'type': 'accuracy', 'value': 1.0}]}\n" ] } ], "source": [ "!python run_translation.py \\\n", " --cache_dir t5_cache_training \\\n", " --model_name_or_path \"google/t5-v1_1-small\" \\\n", " --train_file data/s2s-train.json \\\n", " --validation_file data/s2s-valid.json \\\n", " --test_file data/s2s-test.json \\\n", " --per_device_train_batch_size 8 \\\n", " --per_device_eval_batch_size 8 \\\n", " --source_lang \"text\" \\\n", " --target_lang \"label\" \\\n", " --source_prefix \"emotion classification\" \\\n", " --max_source_length 256 \\\n", " --max_target_length 128 \\\n", " --generation_max_length 128 \\\n", " --do_train \\\n", " --do_eval \\\n", " --do_predict \\\n", " --predict_with_generate \\\n", " --num_train_epochs 1 \\\n", " --output_dir out/emotion/t5_v1_1 \\\n", " --overwrite_output_dir \\\n", " --eval_steps 250 \\\n", " --evaluation_strategy steps \\\n", " --metric_for_best_model accuracy \\\n", " --logging_steps 100 \\\n", " --save_total_limit 5 \\\n", " --max_steps 2500 \\\n", " --load_best_model_at_end True " ] }, { "cell_type": "markdown", "metadata": { "id": "XyC_7Ov07ICm" }, "source": [ "# **FLAN T5**" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "id": "nX6LOzsF7ICm" }, "outputs": [], "source": [ "from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM\n", "import json" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "id": "EEuIugWA7ICm" }, "outputs": [], "source": [ "if torch.cuda.is_available():\n", " device = 0\n", "else:\n", " device = -1" ] }, { "cell_type": "code", "source": [ "def perform_shot_learning(pipeline_type, model_name, test_file):\n", " class_type = AutoModelForSeq2SeqLM\n", " model = class_type.from_pretrained(model_name, torch_dtype=torch.float32)\n", " tokenizer = AutoTokenizer.from_pretrained(model_name)\n", "\n", " our_pipeline = pipeline(pipeline_type, model=model, tokenizer=tokenizer, device=device)\n", "\n", " correct = 0\n", "\n", " labels = \"possible labels: sadness, joy, love, anger, fear, surprise\"\n", "\n", " with open(test_file) as f:\n", " f_lines = f.readlines()\n", " for line in f_lines:\n", " ex = json.loads(line)\n", " prompt = ex['text']\n", "\n", " tmp = labels + '\\n' + f'text: {prompt}' + '\\n' + 'label: '\n", " \n", " predict = our_pipeline(tmp, do_sample=False)[0]['generated_text']\n", "\n", " if predict == ex['label']:\n", " correct += 1\n", "\n", " print(f'Accuracy: {correct/len(f_lines)}')" ], "metadata": { "id": "AtDz85GKalzg" }, "execution_count": 15, "outputs": [] }, { "cell_type": "code", "source": [ "test_ds = 'data/s2s-test.json'" ], "metadata": { "id": "q9-4fzxpaoff" }, "execution_count": 16, "outputs": [] }, { "cell_type": "code", "source": [ "perform_shot_learning('text2text-generation', 'google/flan-t5-large', test_ds)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 219, "referenced_widgets": [ "18f03144f5194bd2a88064eaae1140f0", "36b8333766d44ee2aaa8da8ee75975d2", "f7a9b125cf1346468e428abd689ff800", "9b9e6161874f41c98d5c5e55d8d4fc86", "9925a6f17ba14eee96332f0ea1dc88e5", "dce64adfb8334591a8ce182918ecb4e3", "9efd8cd2208245aca3f369f0735e2ee1", "3d05704ffb0040c8b5bfb5c068c3329b", "9564dcdd10c64072bb09e70def311ff3", "f406c9b52a274068bd636554558497b2", "d97be50f8cc64f8680a6cce112863255", "7d6b88e56dad4dcbb0f1b1720f1ff118", "eabc78cbdeef40feb36cf90fdbcdfbc7", "6477d99dffbc4cf39e2c6998f71e37f7", "d63511a8852942309cabe53720939fcc", "3096b59f64eb48659a8eedea5a171be4", "acc58b06f3b54801b10ee872fab39e6e", "c2bd9c9ddab848529e52adfdc7634044", "8d7e8c29d7e247f1b55d329d40508526", "457b70adcab0464c9f990b13f433c635", "0858fe327ec549b488f6169de1d84654", "e18a505153c7491f8900142fb1189cd7", "945026e5e11448b39ab37fb2a0bd963c", "8c3aa97d58cb4f21b59af6253c952859", "848ff807a83c4a79a1b3d7d80c29499c", "a7b1f6722fcd4e90811041b24df0fe7b", "f815d05091814c39a467cd8f528db504", "915449ab41d848d39d801b4feb932a4f", "2937b015455647abb7a524f858a881d2", "c2b6cda9a8e94f7e97d7fb032b8e2bc5", "af885a022ad743098e5037e1c8dc760a", "088ec36aff7f415abfc4fd926fa0f902", "b1b99d863dc64208afc11416d4936c2c", "cb9e02be7ec44f6bb6b8771691c114e4", "f68a247bddf9484e9f7b1666802f4612", "d8d89ac972084304bff515a16e009452", "3495b00846ae49acbb0cf3e15edf361e", "60f6f23e78ce4ee2abf7389ab936c3ac", "9d428e02c4134510baf179ce9137d90c", "5298f4cd4e2e404ea66d70c62bcfe439", "cd9fdc3eb94a4d00b5af6115318dcf45", "d664c674a977456cad109347c0206d0e", "17e5dedc0aeb4a1da32113e51158fd74", "9b70ec9f110f4080a6a26fd12044fe94" ] }, "id": "7fWzF9PVatgL", "outputId": "6c37c046-a14c-4cab-e285-fa1ddfeb3241" }, "execution_count": 17, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Downloading (…)okenizer_config.json: 0%| | 0.00/2.54k [00:00