UG-final/all_models.ipynb

{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "svk2qSrl7ICc"
      },
      "source": [
        "# **Uczenie Głębokie - projekt**\n",
        "W projekcie wykorzystano dataset [emotion](https://huggingface.co/datasets/emotion), zawierający wpisy nacechowane określonymi emocjami.\n",
        "\n",
        "<br>\n",
        "\n",
        "Labels:\n",
        "- 0 - sadness\n",
        "- 1 - joy\n",
        "- 2 - love\n",
        "- 3 - anger\n",
        "- 4 - fear\n",
        "- 5 - surprise"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "wJ30OIAM7ICf"
      },
      "source": [
        "### **REQUIREMENTS**"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "XkE5ENXV7ICf",
        "outputId": "68ec24ee-8dcd-48b7-c0ce-3d18c1b9bcd6"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Requirement already satisfied: transformers in /usr/local/lib/python3.8/dist-packages (4.23.1)\n",
            "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.8/dist-packages (1.2.1)\n",
            "Requirement already satisfied: accelerate in /usr/local/lib/python3.8/dist-packages (0.16.0)\n",
            "Requirement already satisfied: evaluate in /usr/local/lib/python3.8/dist-packages (0.4.0)\n",
            "Requirement already satisfied: datasets in /usr/local/lib/python3.8/dist-packages (2.9.0)\n",
            "Requirement already satisfied: torch in /usr/local/lib/python3.8/dist-packages (1.13.1)\n",
            "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.8/dist-packages (0.1.97)\n",
            "Requirement already satisfied: torchvision in /usr/local/lib/python3.8/dist-packages (0.14.1+cu116)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from transformers) (3.9.0)\n",
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.8/dist-packages (from transformers) (4.64.1)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from transformers) (1.21.6)\n",
            "Requirement already satisfied: huggingface-hub<1.0,>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from transformers) (0.12.0)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.8/dist-packages (from transformers) (2022.6.2)\n",
            "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.8/dist-packages (from transformers) (0.13.2)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.8/dist-packages (from transformers) (2.25.1)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.8/dist-packages (from transformers) (6.0)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from transformers) (23.0)\n",
            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (3.1.0)\n",
            "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.2.0)\n",
            "Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.7.3)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.8/dist-packages (from accelerate) (5.4.8)\n",
            "Requirement already satisfied: dill in /usr/local/lib/python3.8/dist-packages (from evaluate) (0.3.6)\n",
            "Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.8/dist-packages (from evaluate) (0.18.0)\n",
            "Requirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.8/dist-packages (from evaluate) (2023.1.0)\n",
            "Requirement already satisfied: xxhash in /usr/local/lib/python3.8/dist-packages (from evaluate) (3.2.0)\n",
            "Requirement already satisfied: pandas in /usr/local/lib/python3.8/dist-packages (from evaluate) (1.3.5)\n",
            "Requirement already satisfied: multiprocess in /usr/local/lib/python3.8/dist-packages (from evaluate) (0.70.14)\n",
            "Requirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.8/dist-packages (from datasets) (9.0.0)\n",
            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.8/dist-packages (from datasets) (3.8.3)\n",
            "Requirement already satisfied: nvidia-cublas-cu11==11.10.3.66 in /usr/local/lib/python3.8/dist-packages (from torch) (11.10.3.66)\n",
            "Requirement already satisfied: nvidia-cuda-runtime-cu11==11.7.99 in /usr/local/lib/python3.8/dist-packages (from torch) (11.7.99)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.8/dist-packages (from torch) (4.4.0)\n",
            "Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.7.99 in /usr/local/lib/python3.8/dist-packages (from torch) (11.7.99)\n",
            "Requirement already satisfied: nvidia-cudnn-cu11==8.5.0.96 in /usr/local/lib/python3.8/dist-packages (from torch) (8.5.0.96)\n",
            "Requirement already satisfied: wheel in /usr/local/lib/python3.8/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch) (0.38.4)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.8/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch) (57.4.0)\n",
            "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.8/dist-packages (from torchvision) (7.1.2)\n",
            "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (4.0.2)\n",
            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.8.2)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (6.0.4)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.3)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.1)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (22.2.0)\n",
            "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (2.1.1)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (2022.12.7)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (1.26.14)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (2.10)\n",
            "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (4.0.0)\n",
            "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas->evaluate) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas->evaluate) (2022.7.1)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.8/dist-packages (from python-dateutil>=2.7.3->pandas->evaluate) (1.15.0)\n"
          ]
        }
      ],
      "source": [
        "!pip3 install transformers scikit-learn accelerate evaluate datasets torch sentencepiece torchvision"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "id": "MrV5G1gW7ICg"
      },
      "outputs": [],
      "source": [
        "import os\n",
        "import json\n",
        "from pathlib import Path\n",
        "from typing import Dict, List\n",
        "from datasets import load_dataset\n",
        "import torch\n",
        "import pandas as pd\n",
        "\n",
        "os.environ['TOKENIZERS_PARALLELISM'] = 'true'"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Y107u4JG7ICh"
      },
      "source": [
        "### **DATA PREP**"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "PmgAAQFV7ICh",
        "outputId": "e6f4f065-4d0d-4102-d96a-c5ca791dd113"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "No config specified, defaulting to: emotion/split\n",
            "Found cached dataset emotion (/root/.cache/huggingface/datasets/emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd)\n",
            "\r  0% 0/3 [00:00<?, ?it/s]\r100% 3/3 [00:00<00:00, 182.77it/s]\n",
            "Saving into: data/train.json\n",
            "Saving into: data/s2s-train.json\n",
            "Saving into: data/valid.json\n",
            "Saving into: data/s2s-valid.json\n",
            "Saving into: data/test.json\n",
            "Saving into: data/s2s-test.json\n"
          ]
        }
      ],
      "source": [
        "!mkdir -p data\n",
        "!python data_prep.py"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Fv0h2-MW7ICh",
        "outputId": "ab7744f0-38e1-4415-f9e0-dbb182583e83"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{\"label\": 0, \"text\": \"i didnt feel humiliated\"}\n",
            "{\"label\": 0, \"text\": \"i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake\"}\n",
            "{\"label\": 3, \"text\": \"im grabbing a minute to post i feel greedy wrong\"}\n",
            "{\"label\": 2, \"text\": \"i am ever feeling nostalgic about the fireplace i will know that it is still on the property\"}\n",
            "{\"label\": 3, \"text\": \"i am feeling grouchy\"}\n",
            "{\"label\": 0, \"text\": \"ive been feeling a little burdened lately wasnt sure why that was\"}\n",
            "{\"label\": 5, \"text\": \"ive been taking or milligrams or times recommended amount and ive fallen asleep a lot faster but i also feel like so funny\"}\n",
            "{\"label\": 4, \"text\": \"i feel as confused about life as a teenager or as jaded as a year old man\"}\n",
            "{\"label\": 1, \"text\": \"i have been with petronas for years i feel that petronas has performed well and made a huge profit\"}\n",
            "{\"label\": 2, \"text\": \"i feel romantic too\"}\n"
          ]
        }
      ],
      "source": [
        "!head data/train.json"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "6XHKYEod7ICi",
        "outputId": "75b8480e-159a-4968-b0cc-3605680f7410"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{\"label\": \"sadness\", \"text\": \"i didnt feel humiliated\"}\n",
            "{\"label\": \"sadness\", \"text\": \"i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake\"}\n",
            "{\"label\": \"anger\", \"text\": \"im grabbing a minute to post i feel greedy wrong\"}\n",
            "{\"label\": \"love\", \"text\": \"i am ever feeling nostalgic about the fireplace i will know that it is still on the property\"}\n",
            "{\"label\": \"anger\", \"text\": \"i am feeling grouchy\"}\n",
            "{\"label\": \"sadness\", \"text\": \"ive been feeling a little burdened lately wasnt sure why that was\"}\n",
            "{\"label\": \"surprise\", \"text\": \"ive been taking or milligrams or times recommended amount and ive fallen asleep a lot faster but i also feel like so funny\"}\n",
            "{\"label\": \"fear\", \"text\": \"i feel as confused about life as a teenager or as jaded as a year old man\"}\n",
            "{\"label\": \"joy\", \"text\": \"i have been with petronas for years i feel that petronas has performed well and made a huge profit\"}\n",
            "{\"label\": \"love\", \"text\": \"i feel romantic too\"}\n"
          ]
        }
      ],
      "source": [
        "!head data/s2s-train.json"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "BtTwN0yz7ICj",
        "outputId": "5818cd0b-56eb-4a0f-cada-cf89d03f8d9c"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "   2000 data/s2s-test.json\n",
            "  16000 data/s2s-train.json\n",
            "   2000 data/s2s-valid.json\n",
            "   2000 data/test.json\n",
            "  16000 data/train.json\n",
            "   2000 data/valid.json\n",
            "  40000 total\n"
          ]
        }
      ],
      "source": [
        "!wc -l data/*"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "jaX7Iyck7ICk"
      },
      "source": [
        "## **ROBERTA**"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "bPBy_20B7ICk"
      },
      "source": [
        "- full data\n",
        "- model `roberta-base`\n",
        "- sequnece length: 128\n",
        "- training epoch: 1"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {
        "id": "C5TetFI_7ICk",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "b83e8452-3eb2-4230-f19e-55fe8a830f4e"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "2023-02-14 21:44:57.299984: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA\n",
            "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
            "2023-02-14 21:44:57.452345: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
            "2023-02-14 21:44:58.236913: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
            "2023-02-14 21:44:58.237017: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
            "2023-02-14 21:44:58.237058: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
            "WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
            "INFO:__main__:Training/evaluation parameters TrainingArguments(\n",
            "_n_gpu=1,\n",
            "adafactor=False,\n",
            "adam_beta1=0.9,\n",
            "adam_beta2=0.999,\n",
            "adam_epsilon=1e-08,\n",
            "auto_find_batch_size=False,\n",
            "bf16=False,\n",
            "bf16_full_eval=False,\n",
            "data_seed=None,\n",
            "dataloader_drop_last=False,\n",
            "dataloader_num_workers=0,\n",
            "dataloader_pin_memory=True,\n",
            "ddp_bucket_cap_mb=None,\n",
            "ddp_find_unused_parameters=None,\n",
            "ddp_timeout=1800,\n",
            "debug=[],\n",
            "deepspeed=None,\n",
            "disable_tqdm=False,\n",
            "do_eval=True,\n",
            "do_predict=True,\n",
            "do_train=True,\n",
            "eval_accumulation_steps=None,\n",
            "eval_delay=0,\n",
            "eval_steps=None,\n",
            "evaluation_strategy=no,\n",
            "fp16=False,\n",
            "fp16_backend=auto,\n",
            "fp16_full_eval=False,\n",
            "fp16_opt_level=O1,\n",
            "fsdp=[],\n",
            "fsdp_min_num_params=0,\n",
            "fsdp_transformer_layer_cls_to_wrap=None,\n",
            "full_determinism=False,\n",
            "gradient_accumulation_steps=1,\n",
            "gradient_checkpointing=False,\n",
            "greater_is_better=None,\n",
            "group_by_length=False,\n",
            "half_precision_backend=auto,\n",
            "hub_model_id=None,\n",
            "hub_private_repo=False,\n",
            "hub_strategy=every_save,\n",
            "hub_token=<HUB_TOKEN>,\n",
            "ignore_data_skip=False,\n",
            "include_inputs_for_metrics=False,\n",
            "jit_mode_eval=False,\n",
            "label_names=None,\n",
            "label_smoothing_factor=0.0,\n",
            "learning_rate=2e-05,\n",
            "length_column_name=length,\n",
            "load_best_model_at_end=False,\n",
            "local_rank=-1,\n",
            "log_level=passive,\n",
            "log_level_replica=passive,\n",
            "log_on_each_node=True,\n",
            "logging_dir=out/emotion/roberta/runs/Feb14_21-45-00_fc0011e45a00,\n",
            "logging_first_step=False,\n",
            "logging_nan_inf_filter=True,\n",
            "logging_steps=500,\n",
            "logging_strategy=steps,\n",
            "lr_scheduler_type=linear,\n",
            "max_grad_norm=1.0,\n",
            "max_steps=-1,\n",
            "metric_for_best_model=None,\n",
            "mp_parameters=,\n",
            "no_cuda=False,\n",
            "num_train_epochs=1.0,\n",
            "optim=adamw_hf,\n",
            "output_dir=out/emotion/roberta,\n",
            "overwrite_output_dir=True,\n",
            "past_index=-1,\n",
            "per_device_eval_batch_size=24,\n",
            "per_device_train_batch_size=24,\n",
            "prediction_loss_only=False,\n",
            "push_to_hub=False,\n",
            "push_to_hub_model_id=None,\n",
            "push_to_hub_organization=None,\n",
            "push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
            "ray_scope=last,\n",
            "remove_unused_columns=True,\n",
            "report_to=['tensorboard'],\n",
            "resume_from_checkpoint=None,\n",
            "run_name=out/emotion/roberta,\n",
            "save_on_each_node=False,\n",
            "save_steps=500,\n",
            "save_strategy=steps,\n",
            "save_total_limit=None,\n",
            "seed=42,\n",
            "sharded_ddp=[],\n",
            "skip_memory_metrics=True,\n",
            "tf32=None,\n",
            "torchdynamo=None,\n",
            "tpu_metrics_debug=False,\n",
            "tpu_num_cores=None,\n",
            "use_ipex=False,\n",
            "use_legacy_prediction_loop=False,\n",
            "use_mps_device=False,\n",
            "warmup_ratio=0.0,\n",
            "warmup_steps=0,\n",
            "weight_decay=0.0,\n",
            "xpu_backend=None,\n",
            ")\n",
            "INFO:__main__:load a local file for train: data/train.json\n",
            "INFO:__main__:load a local file for validation: data/valid.json\n",
            "INFO:__main__:load a local file for test: data/test.json\n",
            "WARNING:datasets.builder:Using custom data configuration default-01aa9d8252a24a0d\n",
            "INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n",
            "INFO:datasets.builder:Generating dataset json (/content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
            "Downloading and preparing dataset json/default to /content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n",
            "Downloading data files: 100% 3/3 [00:00<00:00, 11491.24it/s]\n",
            "INFO:datasets.download.download_manager:Downloading took 0.0 min\n",
            "INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n",
            "Extracting data files: 100% 3/3 [00:00<00:00, 1882.54it/s]\n",
            "INFO:datasets.utils.info_utils:Unable to verify checksums.\n",
            "INFO:datasets.builder:Generating train split\n",
            "INFO:datasets.builder:Generating validation split\n",
            "INFO:datasets.builder:Generating test split\n",
            "INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n",
            "Dataset json downloaded and prepared to /content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n",
            "100% 3/3 [00:00<00:00, 573.49it/s]\n",
            "Downloading (…)lve/main/config.json: 100% 481/481 [00:00<00:00, 83.8kB/s]\n",
            "[INFO|configuration_utils.py:653] 2023-02-14 21:45:01,575 >> loading configuration file config.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n",
            "[INFO|configuration_utils.py:705] 2023-02-14 21:45:01,576 >> Model config RobertaConfig {\n",
            "  \"_name_or_path\": \"roberta-base\",\n",
            "  \"architectures\": [\n",
            "    \"RobertaForMaskedLM\"\n",
            "  ],\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"bos_token_id\": 0,\n",
            "  \"classifier_dropout\": null,\n",
            "  \"eos_token_id\": 2,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"id2label\": {\n",
            "    \"0\": \"LABEL_0\",\n",
            "    \"1\": \"LABEL_1\",\n",
            "    \"2\": \"LABEL_2\",\n",
            "    \"3\": \"LABEL_3\",\n",
            "    \"4\": \"LABEL_4\",\n",
            "    \"5\": \"LABEL_5\"\n",
            "  },\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"label2id\": {\n",
            "    \"LABEL_0\": 0,\n",
            "    \"LABEL_1\": 1,\n",
            "    \"LABEL_2\": 2,\n",
            "    \"LABEL_3\": 3,\n",
            "    \"LABEL_4\": 4,\n",
            "    \"LABEL_5\": 5\n",
            "  },\n",
            "  \"layer_norm_eps\": 1e-05,\n",
            "  \"max_position_embeddings\": 514,\n",
            "  \"model_type\": \"roberta\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"pad_token_id\": 1,\n",
            "  \"position_embedding_type\": \"absolute\",\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"type_vocab_size\": 1,\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 50265\n",
            "}\n",
            "\n",
            "[INFO|tokenization_auto.py:418] 2023-02-14 21:45:01,670 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
            "[INFO|configuration_utils.py:653] 2023-02-14 21:45:01,762 >> loading configuration file config.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n",
            "[INFO|configuration_utils.py:705] 2023-02-14 21:45:01,763 >> Model config RobertaConfig {\n",
            "  \"_name_or_path\": \"roberta-base\",\n",
            "  \"architectures\": [\n",
            "    \"RobertaForMaskedLM\"\n",
            "  ],\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"bos_token_id\": 0,\n",
            "  \"classifier_dropout\": null,\n",
            "  \"eos_token_id\": 2,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"layer_norm_eps\": 1e-05,\n",
            "  \"max_position_embeddings\": 514,\n",
            "  \"model_type\": \"roberta\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"pad_token_id\": 1,\n",
            "  \"position_embedding_type\": \"absolute\",\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"type_vocab_size\": 1,\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 50265\n",
            "}\n",
            "\n",
            "Downloading (…)olve/main/vocab.json: 100% 899k/899k [00:00<00:00, 9.36MB/s]\n",
            "Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 4.95MB/s]\n",
            "Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 11.7MB/s]\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,975 >> loading file vocab.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/vocab.json\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file merges.txt from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/merges.txt\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file tokenizer.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/tokenizer.json\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file added_tokens.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file special_tokens_map.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file tokenizer_config.json from cache at None\n",
            "[INFO|configuration_utils.py:653] 2023-02-14 21:45:02,976 >> loading configuration file config.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n",
            "[INFO|configuration_utils.py:705] 2023-02-14 21:45:02,977 >> Model config RobertaConfig {\n",
            "  \"_name_or_path\": \"roberta-base\",\n",
            "  \"architectures\": [\n",
            "    \"RobertaForMaskedLM\"\n",
            "  ],\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"bos_token_id\": 0,\n",
            "  \"classifier_dropout\": null,\n",
            "  \"eos_token_id\": 2,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"layer_norm_eps\": 1e-05,\n",
            "  \"max_position_embeddings\": 514,\n",
            "  \"model_type\": \"roberta\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"pad_token_id\": 1,\n",
            "  \"position_embedding_type\": \"absolute\",\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"type_vocab_size\": 1,\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 50265\n",
            "}\n",
            "\n",
            "INFO:__main__:Using implementation from class: AutoModelForSequenceClassification\n",
            "Downloading (…)\"pytorch_model.bin\";: 100% 501M/501M [00:04<00:00, 105MB/s]\n",
            "[INFO|modeling_utils.py:2156] 2023-02-14 21:45:08,072 >> loading weights file pytorch_model.bin from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/pytorch_model.bin\n",
            "[WARNING|modeling_utils.py:2596] 2023-02-14 21:45:09,415 >> Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight']\n",
            "- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
            "- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
            "[WARNING|modeling_utils.py:2608] 2023-02-14 21:45:09,415 >> Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']\n",
            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
            "\n",
            "\n",
            "Frozen layers:\n",
            "[('roberta.encoder.layer.0.attention.self.query.weight', False), ('roberta.encoder.layer.0.attention.self.query.bias', False), ('roberta.encoder.layer.0.attention.self.key.weight', False), ('roberta.encoder.layer.0.attention.self.key.bias', False), ('roberta.encoder.layer.0.attention.self.value.weight', False), ('roberta.encoder.layer.0.attention.self.value.bias', False), ('roberta.encoder.layer.0.attention.output.dense.weight', False), ('roberta.encoder.layer.0.attention.output.dense.bias', False), ('roberta.encoder.layer.0.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.0.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.0.intermediate.dense.weight', False), ('roberta.encoder.layer.0.intermediate.dense.bias', False), ('roberta.encoder.layer.0.output.dense.weight', False), ('roberta.encoder.layer.0.output.dense.bias', False), ('roberta.encoder.layer.0.output.LayerNorm.weight', False), ('roberta.encoder.layer.0.output.LayerNorm.bias', False), ('roberta.encoder.layer.2.attention.self.query.weight', False), ('roberta.encoder.layer.2.attention.self.query.bias', False), ('roberta.encoder.layer.2.attention.self.key.weight', False), ('roberta.encoder.layer.2.attention.self.key.bias', False), ('roberta.encoder.layer.2.attention.self.value.weight', False), ('roberta.encoder.layer.2.attention.self.value.bias', False), ('roberta.encoder.layer.2.attention.output.dense.weight', False), ('roberta.encoder.layer.2.attention.output.dense.bias', False), ('roberta.encoder.layer.2.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.2.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.2.intermediate.dense.weight', False), ('roberta.encoder.layer.2.intermediate.dense.bias', False), ('roberta.encoder.layer.2.output.dense.weight', False), ('roberta.encoder.layer.2.output.dense.bias', False), ('roberta.encoder.layer.2.output.LayerNorm.weight', False), ('roberta.encoder.layer.2.output.LayerNorm.bias', False), ('roberta.encoder.layer.4.attention.self.query.weight', False), ('roberta.encoder.layer.4.attention.self.query.bias', False), ('roberta.encoder.layer.4.attention.self.key.weight', False), ('roberta.encoder.layer.4.attention.self.key.bias', False), ('roberta.encoder.layer.4.attention.self.value.weight', False), ('roberta.encoder.layer.4.attention.self.value.bias', False), ('roberta.encoder.layer.4.attention.output.dense.weight', False), ('roberta.encoder.layer.4.attention.output.dense.bias', False), ('roberta.encoder.layer.4.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.4.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.4.intermediate.dense.weight', False), ('roberta.encoder.layer.4.intermediate.dense.bias', False), ('roberta.encoder.layer.4.output.dense.weight', False), ('roberta.encoder.layer.4.output.dense.bias', False), ('roberta.encoder.layer.4.output.LayerNorm.weight', False), ('roberta.encoder.layer.4.output.LayerNorm.bias', False), ('roberta.encoder.layer.6.attention.self.query.weight', False), ('roberta.encoder.layer.6.attention.self.query.bias', False), ('roberta.encoder.layer.6.attention.self.key.weight', False), ('roberta.encoder.layer.6.attention.self.key.bias', False), ('roberta.encoder.layer.6.attention.self.value.weight', False), ('roberta.encoder.layer.6.attention.self.value.bias', False), ('roberta.encoder.layer.6.attention.output.dense.weight', False), ('roberta.encoder.layer.6.attention.output.dense.bias', False), ('roberta.encoder.layer.6.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.6.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.6.intermediate.dense.weight', False), ('roberta.encoder.layer.6.intermediate.dense.bias', False), ('roberta.encoder.layer.6.output.dense.weight', False), ('roberta.encoder.layer.6.output.dense.bias', False), ('roberta.encoder.layer.6.output.LayerNorm.weight', False), ('roberta.encoder.layer.6.output.LayerNorm.bias', False), ('roberta.encoder.layer.8.attention.self.query.weight', False), ('roberta.encoder.layer.8.attention.self.query.bias', False), ('roberta.encoder.layer.8.attention.self.key.weight', False), ('roberta.encoder.layer.8.attention.self.key.bias', False), ('roberta.encoder.layer.8.attention.self.value.weight', False), ('roberta.encoder.layer.8.attention.self.value.bias', False), ('roberta.encoder.layer.8.attention.output.dense.weight', False), ('roberta.encoder.layer.8.attention.output.dense.bias', False), ('roberta.encoder.layer.8.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.8.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.8.intermediate.dense.weight', False), ('roberta.encoder.layer.8.intermediate.dense.bias', False), ('roberta.encoder.layer.8.output.dense.weight', False), ('roberta.encoder.layer.8.output.dense.bias', False), ('roberta.encoder.layer.8.output.LayerNorm.weight', False), ('roberta.encoder.layer.8.output.LayerNorm.bias', False), ('roberta.encoder.layer.10.attention.self.query.weight', False), ('roberta.encoder.layer.10.attention.self.query.bias', False), ('roberta.encoder.layer.10.attention.self.key.weight', False), ('roberta.encoder.layer.10.attention.self.key.bias', False), ('roberta.encoder.layer.10.attention.self.value.weight', False), ('roberta.encoder.layer.10.attention.self.value.bias', False), ('roberta.encoder.layer.10.attention.output.dense.weight', False), ('roberta.encoder.layer.10.attention.output.dense.bias', False), ('roberta.encoder.layer.10.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.10.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.10.intermediate.dense.weight', False), ('roberta.encoder.layer.10.intermediate.dense.bias', False), ('roberta.encoder.layer.10.output.dense.weight', False), ('roberta.encoder.layer.10.output.dense.bias', False), ('roberta.encoder.layer.10.output.LayerNorm.weight', False), ('roberta.encoder.layer.10.output.LayerNorm.bias', False)] \n",
            "\n",
            "\n",
            "Running tokenizer on dataset:   0% 0/16 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-e62b2012f3f40cb2.arrow\n",
            "Running tokenizer on dataset: 100% 16/16 [00:00<00:00, 20.66ba/s]\n",
            "Running tokenizer on dataset:   0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-cd497527f5c67ba7.arrow\n",
            "Running tokenizer on dataset: 100% 2/2 [00:00<00:00,  7.58ba/s]\n",
            "Running tokenizer on dataset:   0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-9c2deb15eb4326c1.arrow\n",
            "Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 20.81ba/s]\n",
            "INFO:__main__:Sample 10476 of the training set: {'label': 0, 'text': 'i do find new friends i m going to try extra hard to make them stay and if i decide that i don t want to feel hurt again and just ride out the last year of school on my own i m going to have to try extra hard not to care what people think of me being a loner', 'input_ids': [0, 118, 109, 465, 92, 964, 939, 475, 164, 7, 860, 1823, 543, 7, 146, 106, 1095, 8, 114, 939, 2845, 14, 939, 218, 326, 236, 7, 619, 2581, 456, 8, 95, 3068, 66, 5, 94, 76, 9, 334, 15, 127, 308, 939, 475, 164, 7, 33, 7, 860, 1823, 543, 45, 7, 575, 99, 82, 206, 9, 162, 145, 10, 784, 9604, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
            "INFO:__main__:Sample 1824 of the training set: {'label': 1, 'text': 'i asked them to join me in creating a world where all year old girls could grow up feeling hopeful and powerful', 'input_ids': [0, 118, 553, 106, 7, 1962, 162, 11, 2351, 10, 232, 147, 70, 76, 793, 1972, 115, 1733, 62, 2157, 7917, 8, 2247, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
            "INFO:__main__:Sample 409 of the training set: {'label': 2, 'text': 'i feel when you are a caring person you attract other caring people into your life', 'input_ids': [0, 118, 619, 77, 47, 32, 10, 10837, 621, 47, 5696, 97, 10837, 82, 88, 110, 301, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
            "[INFO|trainer.py:725] 2023-02-14 21:45:13,102 >> The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.\n",
            "/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
            "  warnings.warn(\n",
            "[INFO|trainer.py:1607] 2023-02-14 21:45:13,109 >> ***** Running training *****\n",
            "[INFO|trainer.py:1608] 2023-02-14 21:45:13,109 >>   Num examples = 16000\n",
            "[INFO|trainer.py:1609] 2023-02-14 21:45:13,109 >>   Num Epochs = 1\n",
            "[INFO|trainer.py:1610] 2023-02-14 21:45:13,109 >>   Instantaneous batch size per device = 24\n",
            "[INFO|trainer.py:1611] 2023-02-14 21:45:13,109 >>   Total train batch size (w. parallel, distributed & accumulation) = 24\n",
            "[INFO|trainer.py:1612] 2023-02-14 21:45:13,109 >>   Gradient Accumulation steps = 1\n",
            "[INFO|trainer.py:1613] 2023-02-14 21:45:13,109 >>   Total optimization steps = 667\n",
            "{'loss': 0.8083, 'learning_rate': 5.0074962518740634e-06, 'epoch': 0.75}\n",
            " 75% 500/667 [00:58<00:19,  8.76it/s][INFO|trainer.py:2656] 2023-02-14 21:46:11,148 >> Saving model checkpoint to out/emotion/roberta/checkpoint-500\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 21:46:11,149 >> Configuration saved in out/emotion/roberta/checkpoint-500/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 21:46:12,047 >> Model weights saved in out/emotion/roberta/checkpoint-500/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:46:12,048 >> tokenizer config file saved in out/emotion/roberta/checkpoint-500/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:46:12,048 >> Special tokens file saved in out/emotion/roberta/checkpoint-500/special_tokens_map.json\n",
            "100% 666/667 [01:19<00:00,  8.78it/s][INFO|trainer.py:1852] 2023-02-14 21:46:32,443 >> \n",
            "\n",
            "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
            "\n",
            "\n",
            "{'train_runtime': 79.3341, 'train_samples_per_second': 201.679, 'train_steps_per_second': 8.407, 'train_loss': 0.7161429089227359, 'epoch': 1.0}\n",
            "100% 667/667 [01:19<00:00,  8.41it/s]\n",
            "[INFO|trainer.py:2656] 2023-02-14 21:46:32,445 >> Saving model checkpoint to out/emotion/roberta\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 21:46:32,446 >> Configuration saved in out/emotion/roberta/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 21:46:33,422 >> Model weights saved in out/emotion/roberta/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:46:33,422 >> tokenizer config file saved in out/emotion/roberta/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:46:33,423 >> Special tokens file saved in out/emotion/roberta/special_tokens_map.json\n",
            "***** train metrics *****\n",
            "  epoch                    =        1.0\n",
            "  train_loss               =     0.7161\n",
            "  train_runtime            = 0:01:19.33\n",
            "  train_samples            =      16000\n",
            "  train_samples_per_second =    201.679\n",
            "  train_steps_per_second   =      8.407\n",
            "INFO:__main__:*** Evaluate ***\n",
            "[INFO|trainer.py:725] 2023-02-14 21:46:33,524 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:46:33,526 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:46:33,526 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:46:33,526 >>   Batch size = 24\n",
            "100% 84/84 [00:03<00:00, 23.66it/s]\n",
            "***** eval metrics *****\n",
            "  epoch                   =        1.0\n",
            "  eval_accuracy           =      0.889\n",
            "  eval_loss               =     0.3302\n",
            "  eval_runtime            = 0:00:03.59\n",
            "  eval_samples            =       2000\n",
            "  eval_samples_per_second =    556.411\n",
            "  eval_steps_per_second   =     23.369\n",
            "INFO:__main__:*** Predict ***\n",
            "[INFO|trainer.py:725] 2023-02-14 21:46:37,124 >> The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:46:37,125 >> ***** Running Prediction *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:46:37,125 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:46:37,125 >>   Batch size = 24\n",
            "100% 84/84 [00:03<00:00, 23.68it/s]\n",
            "INFO:__main__:***** Predict results None *****\n",
            "[INFO|modelcard.py:444] 2023-02-14 21:46:40,840 >> Dropping the following result as it does not have all the necessary fields:\n",
            "{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.8889999985694885}]}\n"
          ]
        }
      ],
      "source": [
        "!python run_glue.py \\\n",
        "  --cache_dir roberta_training_cache \\\n",
        "  --model_name_or_path roberta-base \\\n",
        "  --train_file data/train.json  \\\n",
        "  --validation_file data/valid.json \\\n",
        "  --test_file data/test.json \\\n",
        "  --per_device_train_batch_size 24 \\\n",
        "  --per_device_eval_batch_size 24 \\\n",
        "  --do_train \\\n",
        "  --do_eval \\\n",
        "  --do_predict \\\n",
        "  --max_seq_length 128 \\\n",
        "  --learning_rate 2e-5 \\\n",
        "  --num_train_epochs 1 \\\n",
        "  --output_dir out/emotion/roberta  \\\n",
        "  --overwrite_output_dir"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "- full data\n",
        "- sequence length: 128\n",
        "- leakyRelu instad of relu\n",
        "- every other layer frozen\n",
        "- custom head"
      ],
      "metadata": {
        "id": "b1iFFLFAf9PC"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!python run_glue.py \\\n",
        "  --cache_dir roberta_custom_training_cache \\\n",
        "  --model_name_or_path roberta-base \\\n",
        "  --custom_model roberta_custom \\\n",
        "  --train_file data/train.json  \\\n",
        "  --validation_file data/valid.json \\\n",
        "  --test_file data/test.json \\\n",
        "  --per_device_train_batch_size 24 \\\n",
        "  --per_device_eval_batch_size 24 \\\n",
        "  --do_train \\\n",
        "  --do_eval \\\n",
        "  --do_predict \\\n",
        "  --max_seq_length 128 \\\n",
        "  --learning_rate 2e-5 \\\n",
        "  --num_train_epochs 1 \\\n",
        "  --output_dir out/emotion/roberta_custom  \\\n",
        "  --overwrite_output_dir"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "WzRBwNKqkDAk",
        "outputId": "8d042117-3af6-4041-d1a5-d70024df24fb"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "2023-02-14 21:47:02.722049: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA\n",
            "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
            "2023-02-14 21:47:02.876002: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
            "2023-02-14 21:47:03.659342: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
            "2023-02-14 21:47:03.659451: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
            "2023-02-14 21:47:03.659470: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
            "WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
            "INFO:__main__:Training/evaluation parameters TrainingArguments(\n",
            "_n_gpu=1,\n",
            "adafactor=False,\n",
            "adam_beta1=0.9,\n",
            "adam_beta2=0.999,\n",
            "adam_epsilon=1e-08,\n",
            "auto_find_batch_size=False,\n",
            "bf16=False,\n",
            "bf16_full_eval=False,\n",
            "data_seed=None,\n",
            "dataloader_drop_last=False,\n",
            "dataloader_num_workers=0,\n",
            "dataloader_pin_memory=True,\n",
            "ddp_bucket_cap_mb=None,\n",
            "ddp_find_unused_parameters=None,\n",
            "ddp_timeout=1800,\n",
            "debug=[],\n",
            "deepspeed=None,\n",
            "disable_tqdm=False,\n",
            "do_eval=True,\n",
            "do_predict=True,\n",
            "do_train=True,\n",
            "eval_accumulation_steps=None,\n",
            "eval_delay=0,\n",
            "eval_steps=None,\n",
            "evaluation_strategy=no,\n",
            "fp16=False,\n",
            "fp16_backend=auto,\n",
            "fp16_full_eval=False,\n",
            "fp16_opt_level=O1,\n",
            "fsdp=[],\n",
            "fsdp_min_num_params=0,\n",
            "fsdp_transformer_layer_cls_to_wrap=None,\n",
            "full_determinism=False,\n",
            "gradient_accumulation_steps=1,\n",
            "gradient_checkpointing=False,\n",
            "greater_is_better=None,\n",
            "group_by_length=False,\n",
            "half_precision_backend=auto,\n",
            "hub_model_id=None,\n",
            "hub_private_repo=False,\n",
            "hub_strategy=every_save,\n",
            "hub_token=<HUB_TOKEN>,\n",
            "ignore_data_skip=False,\n",
            "include_inputs_for_metrics=False,\n",
            "jit_mode_eval=False,\n",
            "label_names=None,\n",
            "label_smoothing_factor=0.0,\n",
            "learning_rate=2e-05,\n",
            "length_column_name=length,\n",
            "load_best_model_at_end=False,\n",
            "local_rank=-1,\n",
            "log_level=passive,\n",
            "log_level_replica=passive,\n",
            "log_on_each_node=True,\n",
            "logging_dir=out/emotion/roberta_custom/runs/Feb14_21-47-05_fc0011e45a00,\n",
            "logging_first_step=False,\n",
            "logging_nan_inf_filter=True,\n",
            "logging_steps=500,\n",
            "logging_strategy=steps,\n",
            "lr_scheduler_type=linear,\n",
            "max_grad_norm=1.0,\n",
            "max_steps=-1,\n",
            "metric_for_best_model=None,\n",
            "mp_parameters=,\n",
            "no_cuda=False,\n",
            "num_train_epochs=1.0,\n",
            "optim=adamw_hf,\n",
            "output_dir=out/emotion/roberta_custom,\n",
            "overwrite_output_dir=True,\n",
            "past_index=-1,\n",
            "per_device_eval_batch_size=24,\n",
            "per_device_train_batch_size=24,\n",
            "prediction_loss_only=False,\n",
            "push_to_hub=False,\n",
            "push_to_hub_model_id=None,\n",
            "push_to_hub_organization=None,\n",
            "push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
            "ray_scope=last,\n",
            "remove_unused_columns=True,\n",
            "report_to=['tensorboard'],\n",
            "resume_from_checkpoint=None,\n",
            "run_name=out/emotion/roberta_custom,\n",
            "save_on_each_node=False,\n",
            "save_steps=500,\n",
            "save_strategy=steps,\n",
            "save_total_limit=None,\n",
            "seed=42,\n",
            "sharded_ddp=[],\n",
            "skip_memory_metrics=True,\n",
            "tf32=None,\n",
            "torchdynamo=None,\n",
            "tpu_metrics_debug=False,\n",
            "tpu_num_cores=None,\n",
            "use_ipex=False,\n",
            "use_legacy_prediction_loop=False,\n",
            "use_mps_device=False,\n",
            "warmup_ratio=0.0,\n",
            "warmup_steps=0,\n",
            "weight_decay=0.0,\n",
            "xpu_backend=None,\n",
            ")\n",
            "INFO:__main__:load a local file for train: data/train.json\n",
            "INFO:__main__:load a local file for validation: data/valid.json\n",
            "INFO:__main__:load a local file for test: data/test.json\n",
            "WARNING:datasets.builder:Using custom data configuration default-01aa9d8252a24a0d\n",
            "INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n",
            "INFO:datasets.builder:Generating dataset json (/content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
            "Downloading and preparing dataset json/default to /content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n",
            "Downloading data files: 100% 3/3 [00:00<00:00, 14463.12it/s]\n",
            "INFO:datasets.download.download_manager:Downloading took 0.0 min\n",
            "INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n",
            "Extracting data files: 100% 3/3 [00:00<00:00, 2119.76it/s]\n",
            "INFO:datasets.utils.info_utils:Unable to verify checksums.\n",
            "INFO:datasets.builder:Generating train split\n",
            "INFO:datasets.builder:Generating validation split\n",
            "INFO:datasets.builder:Generating test split\n",
            "INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n",
            "Dataset json downloaded and prepared to /content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n",
            "100% 3/3 [00:00<00:00, 657.14it/s]\n",
            "Downloading (…)lve/main/config.json: 100% 481/481 [00:00<00:00, 88.4kB/s]\n",
            "[INFO|configuration_utils.py:653] 2023-02-14 21:47:06,896 >> loading configuration file config.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n",
            "[INFO|configuration_utils.py:705] 2023-02-14 21:47:06,897 >> Model config RobertaConfig {\n",
            "  \"_name_or_path\": \"roberta-base\",\n",
            "  \"architectures\": [\n",
            "    \"RobertaForMaskedLM\"\n",
            "  ],\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"bos_token_id\": 0,\n",
            "  \"classifier_dropout\": null,\n",
            "  \"eos_token_id\": 2,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"id2label\": {\n",
            "    \"0\": \"LABEL_0\",\n",
            "    \"1\": \"LABEL_1\",\n",
            "    \"2\": \"LABEL_2\",\n",
            "    \"3\": \"LABEL_3\",\n",
            "    \"4\": \"LABEL_4\",\n",
            "    \"5\": \"LABEL_5\"\n",
            "  },\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"label2id\": {\n",
            "    \"LABEL_0\": 0,\n",
            "    \"LABEL_1\": 1,\n",
            "    \"LABEL_2\": 2,\n",
            "    \"LABEL_3\": 3,\n",
            "    \"LABEL_4\": 4,\n",
            "    \"LABEL_5\": 5\n",
            "  },\n",
            "  \"layer_norm_eps\": 1e-05,\n",
            "  \"max_position_embeddings\": 514,\n",
            "  \"model_type\": \"roberta\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"pad_token_id\": 1,\n",
            "  \"position_embedding_type\": \"absolute\",\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"type_vocab_size\": 1,\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 50265\n",
            "}\n",
            "\n",
            "[INFO|tokenization_auto.py:418] 2023-02-14 21:47:06,989 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
            "[INFO|configuration_utils.py:653] 2023-02-14 21:47:07,079 >> loading configuration file config.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n",
            "[INFO|configuration_utils.py:705] 2023-02-14 21:47:07,080 >> Model config RobertaConfig {\n",
            "  \"_name_or_path\": \"roberta-base\",\n",
            "  \"architectures\": [\n",
            "    \"RobertaForMaskedLM\"\n",
            "  ],\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"bos_token_id\": 0,\n",
            "  \"classifier_dropout\": null,\n",
            "  \"eos_token_id\": 2,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"layer_norm_eps\": 1e-05,\n",
            "  \"max_position_embeddings\": 514,\n",
            "  \"model_type\": \"roberta\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"pad_token_id\": 1,\n",
            "  \"position_embedding_type\": \"absolute\",\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"type_vocab_size\": 1,\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 50265\n",
            "}\n",
            "\n",
            "Downloading (…)olve/main/vocab.json: 100% 899k/899k [00:00<00:00, 9.35MB/s]\n",
            "Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 4.91MB/s]\n",
            "Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 10.3MB/s]\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file vocab.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/vocab.json\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file merges.txt from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/merges.txt\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file tokenizer.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/tokenizer.json\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file added_tokens.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file special_tokens_map.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file tokenizer_config.json from cache at None\n",
            "[INFO|configuration_utils.py:653] 2023-02-14 21:47:08,306 >> loading configuration file config.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n",
            "[INFO|configuration_utils.py:705] 2023-02-14 21:47:08,306 >> Model config RobertaConfig {\n",
            "  \"_name_or_path\": \"roberta-base\",\n",
            "  \"architectures\": [\n",
            "    \"RobertaForMaskedLM\"\n",
            "  ],\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"bos_token_id\": 0,\n",
            "  \"classifier_dropout\": null,\n",
            "  \"eos_token_id\": 2,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"layer_norm_eps\": 1e-05,\n",
            "  \"max_position_embeddings\": 514,\n",
            "  \"model_type\": \"roberta\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"pad_token_id\": 1,\n",
            "  \"position_embedding_type\": \"absolute\",\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"type_vocab_size\": 1,\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 50265\n",
            "}\n",
            "\n",
            "INFO:__main__:Using hidden states in model: False\n",
            "INFO:__main__:Using implementation from class: RobertaForSequenceClassificationCustomAlternative\n",
            "Downloading (…)\"pytorch_model.bin\";: 100% 501M/501M [00:04<00:00, 106MB/s]\n",
            "[INFO|modeling_utils.py:2156] 2023-02-14 21:47:13,300 >> loading weights file pytorch_model.bin from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/pytorch_model.bin\n",
            "[WARNING|modeling_utils.py:2596] 2023-02-14 21:47:15,772 >> Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassificationCustomAlternative: ['roberta.pooler.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight']\n",
            "- This IS expected if you are initializing RobertaForSequenceClassificationCustomAlternative from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
            "- This IS NOT expected if you are initializing RobertaForSequenceClassificationCustomAlternative from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
            "[WARNING|modeling_utils.py:2608] 2023-02-14 21:47:15,772 >> Some weights of RobertaForSequenceClassificationCustomAlternative were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense_1_input.weight', 'classifier.dense_2.weight', 'classifier.out_proj.bias', 'classifier.dense_2.bias', 'classifier.dense_1_input.bias', 'classifier.dense_1_hidden.weight', 'classifier.dense_1_hidden.bias', 'classifier.out_proj.weight']\n",
            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
            "\n",
            "\n",
            "Frozen layers:\n",
            "[('roberta.encoder.layer.0.attention.self.query.weight', False), ('roberta.encoder.layer.0.attention.self.query.bias', False), ('roberta.encoder.layer.0.attention.self.key.weight', False), ('roberta.encoder.layer.0.attention.self.key.bias', False), ('roberta.encoder.layer.0.attention.self.value.weight', False), ('roberta.encoder.layer.0.attention.self.value.bias', False), ('roberta.encoder.layer.0.attention.output.dense.weight', False), ('roberta.encoder.layer.0.attention.output.dense.bias', False), ('roberta.encoder.layer.0.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.0.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.0.intermediate.dense.weight', False), ('roberta.encoder.layer.0.intermediate.dense.bias', False), ('roberta.encoder.layer.0.output.dense.weight', False), ('roberta.encoder.layer.0.output.dense.bias', False), ('roberta.encoder.layer.0.output.LayerNorm.weight', False), ('roberta.encoder.layer.0.output.LayerNorm.bias', False), ('roberta.encoder.layer.2.attention.self.query.weight', False), ('roberta.encoder.layer.2.attention.self.query.bias', False), ('roberta.encoder.layer.2.attention.self.key.weight', False), ('roberta.encoder.layer.2.attention.self.key.bias', False), ('roberta.encoder.layer.2.attention.self.value.weight', False), ('roberta.encoder.layer.2.attention.self.value.bias', False), ('roberta.encoder.layer.2.attention.output.dense.weight', False), ('roberta.encoder.layer.2.attention.output.dense.bias', False), ('roberta.encoder.layer.2.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.2.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.2.intermediate.dense.weight', False), ('roberta.encoder.layer.2.intermediate.dense.bias', False), ('roberta.encoder.layer.2.output.dense.weight', False), ('roberta.encoder.layer.2.output.dense.bias', False), ('roberta.encoder.layer.2.output.LayerNorm.weight', False), ('roberta.encoder.layer.2.output.LayerNorm.bias', False), ('roberta.encoder.layer.4.attention.self.query.weight', False), ('roberta.encoder.layer.4.attention.self.query.bias', False), ('roberta.encoder.layer.4.attention.self.key.weight', False), ('roberta.encoder.layer.4.attention.self.key.bias', False), ('roberta.encoder.layer.4.attention.self.value.weight', False), ('roberta.encoder.layer.4.attention.self.value.bias', False), ('roberta.encoder.layer.4.attention.output.dense.weight', False), ('roberta.encoder.layer.4.attention.output.dense.bias', False), ('roberta.encoder.layer.4.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.4.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.4.intermediate.dense.weight', False), ('roberta.encoder.layer.4.intermediate.dense.bias', False), ('roberta.encoder.layer.4.output.dense.weight', False), ('roberta.encoder.layer.4.output.dense.bias', False), ('roberta.encoder.layer.4.output.LayerNorm.weight', False), ('roberta.encoder.layer.4.output.LayerNorm.bias', False), ('roberta.encoder.layer.6.attention.self.query.weight', False), ('roberta.encoder.layer.6.attention.self.query.bias', False), ('roberta.encoder.layer.6.attention.self.key.weight', False), ('roberta.encoder.layer.6.attention.self.key.bias', False), ('roberta.encoder.layer.6.attention.self.value.weight', False), ('roberta.encoder.layer.6.attention.self.value.bias', False), ('roberta.encoder.layer.6.attention.output.dense.weight', False), ('roberta.encoder.layer.6.attention.output.dense.bias', False), ('roberta.encoder.layer.6.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.6.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.6.intermediate.dense.weight', False), ('roberta.encoder.layer.6.intermediate.dense.bias', False), ('roberta.encoder.layer.6.output.dense.weight', False), ('roberta.encoder.layer.6.output.dense.bias', False), ('roberta.encoder.layer.6.output.LayerNorm.weight', False), ('roberta.encoder.layer.6.output.LayerNorm.bias', False), ('roberta.encoder.layer.8.attention.self.query.weight', False), ('roberta.encoder.layer.8.attention.self.query.bias', False), ('roberta.encoder.layer.8.attention.self.key.weight', False), ('roberta.encoder.layer.8.attention.self.key.bias', False), ('roberta.encoder.layer.8.attention.self.value.weight', False), ('roberta.encoder.layer.8.attention.self.value.bias', False), ('roberta.encoder.layer.8.attention.output.dense.weight', False), ('roberta.encoder.layer.8.attention.output.dense.bias', False), ('roberta.encoder.layer.8.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.8.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.8.intermediate.dense.weight', False), ('roberta.encoder.layer.8.intermediate.dense.bias', False), ('roberta.encoder.layer.8.output.dense.weight', False), ('roberta.encoder.layer.8.output.dense.bias', False), ('roberta.encoder.layer.8.output.LayerNorm.weight', False), ('roberta.encoder.layer.8.output.LayerNorm.bias', False), ('roberta.encoder.layer.10.attention.self.query.weight', False), ('roberta.encoder.layer.10.attention.self.query.bias', False), ('roberta.encoder.layer.10.attention.self.key.weight', False), ('roberta.encoder.layer.10.attention.self.key.bias', False), ('roberta.encoder.layer.10.attention.self.value.weight', False), ('roberta.encoder.layer.10.attention.self.value.bias', False), ('roberta.encoder.layer.10.attention.output.dense.weight', False), ('roberta.encoder.layer.10.attention.output.dense.bias', False), ('roberta.encoder.layer.10.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.10.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.10.intermediate.dense.weight', False), ('roberta.encoder.layer.10.intermediate.dense.bias', False), ('roberta.encoder.layer.10.output.dense.weight', False), ('roberta.encoder.layer.10.output.dense.bias', False), ('roberta.encoder.layer.10.output.LayerNorm.weight', False), ('roberta.encoder.layer.10.output.LayerNorm.bias', False)] \n",
            "\n",
            "\n",
            "Running tokenizer on dataset:   0% 0/16 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-e62b2012f3f40cb2.arrow\n",
            "Running tokenizer on dataset: 100% 16/16 [00:01<00:00, 15.42ba/s]\n",
            "Running tokenizer on dataset:   0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-cd497527f5c67ba7.arrow\n",
            "Running tokenizer on dataset: 100% 2/2 [00:00<00:00,  7.47ba/s]\n",
            "Running tokenizer on dataset:   0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-9c2deb15eb4326c1.arrow\n",
            "Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 19.76ba/s]\n",
            "INFO:__main__:Sample 10476 of the training set: {'label': 0, 'text': 'i do find new friends i m going to try extra hard to make them stay and if i decide that i don t want to feel hurt again and just ride out the last year of school on my own i m going to have to try extra hard not to care what people think of me being a loner', 'input_ids': [0, 118, 109, 465, 92, 964, 939, 475, 164, 7, 860, 1823, 543, 7, 146, 106, 1095, 8, 114, 939, 2845, 14, 939, 218, 326, 236, 7, 619, 2581, 456, 8, 95, 3068, 66, 5, 94, 76, 9, 334, 15, 127, 308, 939, 475, 164, 7, 33, 7, 860, 1823, 543, 45, 7, 575, 99, 82, 206, 9, 162, 145, 10, 784, 9604, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
            "INFO:__main__:Sample 1824 of the training set: {'label': 1, 'text': 'i asked them to join me in creating a world where all year old girls could grow up feeling hopeful and powerful', 'input_ids': [0, 118, 553, 106, 7, 1962, 162, 11, 2351, 10, 232, 147, 70, 76, 793, 1972, 115, 1733, 62, 2157, 7917, 8, 2247, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
            "INFO:__main__:Sample 409 of the training set: {'label': 2, 'text': 'i feel when you are a caring person you attract other caring people into your life', 'input_ids': [0, 118, 619, 77, 47, 32, 10, 10837, 621, 47, 5696, 97, 10837, 82, 88, 110, 301, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
            "[INFO|trainer.py:725] 2023-02-14 21:47:19,642 >> The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassificationCustomAlternative.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassificationCustomAlternative.forward`,  you can safely ignore this message.\n",
            "/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
            "  warnings.warn(\n",
            "[INFO|trainer.py:1607] 2023-02-14 21:47:19,649 >> ***** Running training *****\n",
            "[INFO|trainer.py:1608] 2023-02-14 21:47:19,649 >>   Num examples = 16000\n",
            "[INFO|trainer.py:1609] 2023-02-14 21:47:19,649 >>   Num Epochs = 1\n",
            "[INFO|trainer.py:1610] 2023-02-14 21:47:19,649 >>   Instantaneous batch size per device = 24\n",
            "[INFO|trainer.py:1611] 2023-02-14 21:47:19,649 >>   Total train batch size (w. parallel, distributed & accumulation) = 24\n",
            "[INFO|trainer.py:1612] 2023-02-14 21:47:19,649 >>   Gradient Accumulation steps = 1\n",
            "[INFO|trainer.py:1613] 2023-02-14 21:47:19,649 >>   Total optimization steps = 667\n",
            "{'loss': 0.8955, 'learning_rate': 5.0074962518740634e-06, 'epoch': 0.75}\n",
            " 75% 500/667 [00:58<00:19,  8.75it/s][INFO|trainer.py:2656] 2023-02-14 21:48:17,996 >> Saving model checkpoint to out/emotion/roberta_custom/checkpoint-500\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 21:48:17,997 >> Configuration saved in out/emotion/roberta_custom/checkpoint-500/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 21:48:19,015 >> Model weights saved in out/emotion/roberta_custom/checkpoint-500/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:48:19,016 >> tokenizer config file saved in out/emotion/roberta_custom/checkpoint-500/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:48:19,016 >> Special tokens file saved in out/emotion/roberta_custom/checkpoint-500/special_tokens_map.json\n",
            "100% 666/667 [01:20<00:00,  8.66it/s][INFO|trainer.py:1852] 2023-02-14 21:48:40,745 >> \n",
            "\n",
            "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
            "\n",
            "\n",
            "{'train_runtime': 81.0963, 'train_samples_per_second': 197.296, 'train_steps_per_second': 8.225, 'train_loss': 0.8004468377383573, 'epoch': 1.0}\n",
            "100% 667/667 [01:21<00:00,  8.23it/s]\n",
            "[INFO|trainer.py:2656] 2023-02-14 21:48:40,747 >> Saving model checkpoint to out/emotion/roberta_custom\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 21:48:40,748 >> Configuration saved in out/emotion/roberta_custom/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 21:48:41,796 >> Model weights saved in out/emotion/roberta_custom/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:48:41,797 >> tokenizer config file saved in out/emotion/roberta_custom/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:48:41,797 >> Special tokens file saved in out/emotion/roberta_custom/special_tokens_map.json\n",
            "***** train metrics *****\n",
            "  epoch                    =        1.0\n",
            "  train_loss               =     0.8004\n",
            "  train_runtime            = 0:01:21.09\n",
            "  train_samples            =      16000\n",
            "  train_samples_per_second =    197.296\n",
            "  train_steps_per_second   =      8.225\n",
            "INFO:__main__:*** Evaluate ***\n",
            "[INFO|trainer.py:725] 2023-02-14 21:48:41,898 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassificationCustomAlternative.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassificationCustomAlternative.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:48:41,899 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:48:41,900 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:48:41,900 >>   Batch size = 24\n",
            "100% 84/84 [00:03<00:00, 23.62it/s]\n",
            "***** eval metrics *****\n",
            "  epoch                   =        1.0\n",
            "  eval_accuracy           =      0.867\n",
            "  eval_loss               =       0.39\n",
            "  eval_runtime            = 0:00:03.59\n",
            "  eval_samples            =       2000\n",
            "  eval_samples_per_second =    555.583\n",
            "  eval_steps_per_second   =     23.334\n",
            "INFO:__main__:*** Predict ***\n",
            "[INFO|trainer.py:725] 2023-02-14 21:48:45,503 >> The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassificationCustomAlternative.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassificationCustomAlternative.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:48:45,504 >> ***** Running Prediction *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:48:45,504 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:48:45,504 >>   Batch size = 24\n",
            "100% 84/84 [00:03<00:00, 23.74it/s]\n",
            "INFO:__main__:***** Predict results None *****\n",
            "[INFO|modelcard.py:444] 2023-02-14 21:48:49,211 >> Dropping the following result as it does not have all the necessary fields:\n",
            "{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.8669999837875366}]}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "HUdoRk5o7ICl"
      },
      "source": [
        "## **GPT2**"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "exFg0yb-7ICl"
      },
      "source": [
        "- full data\n",
        "- model `GPT2`\n",
        "- sequnece length: 128\n",
        "- training epoch: 1"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {
        "id": "DMHK35db7ICl",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "5a3776f5-7feb-480b-a433-a80ed81f3eb7"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "2023-02-14 21:48:52.605236: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA\n",
            "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
            "2023-02-14 21:48:52.757779: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
            "2023-02-14 21:48:53.540701: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
            "2023-02-14 21:48:53.540799: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
            "2023-02-14 21:48:53.540819: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
            "WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
            "INFO:__main__:Training/evaluation parameters TrainingArguments(\n",
            "_n_gpu=1,\n",
            "adafactor=False,\n",
            "adam_beta1=0.9,\n",
            "adam_beta2=0.999,\n",
            "adam_epsilon=1e-08,\n",
            "auto_find_batch_size=False,\n",
            "bf16=False,\n",
            "bf16_full_eval=False,\n",
            "data_seed=None,\n",
            "dataloader_drop_last=False,\n",
            "dataloader_num_workers=0,\n",
            "dataloader_pin_memory=True,\n",
            "ddp_bucket_cap_mb=None,\n",
            "ddp_find_unused_parameters=None,\n",
            "ddp_timeout=1800,\n",
            "debug=[],\n",
            "deepspeed=None,\n",
            "disable_tqdm=False,\n",
            "do_eval=True,\n",
            "do_predict=True,\n",
            "do_train=True,\n",
            "eval_accumulation_steps=None,\n",
            "eval_delay=0,\n",
            "eval_steps=250,\n",
            "evaluation_strategy=steps,\n",
            "fp16=False,\n",
            "fp16_backend=auto,\n",
            "fp16_full_eval=False,\n",
            "fp16_opt_level=O1,\n",
            "fsdp=[],\n",
            "fsdp_min_num_params=0,\n",
            "fsdp_transformer_layer_cls_to_wrap=None,\n",
            "full_determinism=False,\n",
            "gradient_accumulation_steps=1,\n",
            "gradient_checkpointing=False,\n",
            "greater_is_better=True,\n",
            "group_by_length=False,\n",
            "half_precision_backend=auto,\n",
            "hub_model_id=None,\n",
            "hub_private_repo=False,\n",
            "hub_strategy=every_save,\n",
            "hub_token=<HUB_TOKEN>,\n",
            "ignore_data_skip=False,\n",
            "include_inputs_for_metrics=False,\n",
            "jit_mode_eval=False,\n",
            "label_names=None,\n",
            "label_smoothing_factor=0.0,\n",
            "learning_rate=2e-05,\n",
            "length_column_name=length,\n",
            "load_best_model_at_end=True,\n",
            "local_rank=-1,\n",
            "log_level=passive,\n",
            "log_level_replica=passive,\n",
            "log_on_each_node=True,\n",
            "logging_dir=out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00,\n",
            "logging_first_step=False,\n",
            "logging_nan_inf_filter=True,\n",
            "logging_steps=100,\n",
            "logging_strategy=steps,\n",
            "lr_scheduler_type=linear,\n",
            "max_grad_norm=1.0,\n",
            "max_steps=2500,\n",
            "metric_for_best_model=accuracy,\n",
            "mp_parameters=,\n",
            "no_cuda=False,\n",
            "num_train_epochs=1.0,\n",
            "optim=adamw_hf,\n",
            "output_dir=out/emotion/gpt2,\n",
            "overwrite_output_dir=True,\n",
            "past_index=-1,\n",
            "per_device_eval_batch_size=24,\n",
            "per_device_train_batch_size=24,\n",
            "prediction_loss_only=False,\n",
            "push_to_hub=False,\n",
            "push_to_hub_model_id=None,\n",
            "push_to_hub_organization=None,\n",
            "push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
            "ray_scope=last,\n",
            "remove_unused_columns=True,\n",
            "report_to=['tensorboard'],\n",
            "resume_from_checkpoint=None,\n",
            "run_name=out/emotion/gpt2,\n",
            "save_on_each_node=False,\n",
            "save_steps=500,\n",
            "save_strategy=steps,\n",
            "save_total_limit=5,\n",
            "seed=42,\n",
            "sharded_ddp=[],\n",
            "skip_memory_metrics=True,\n",
            "tf32=None,\n",
            "torchdynamo=None,\n",
            "tpu_metrics_debug=False,\n",
            "tpu_num_cores=None,\n",
            "use_ipex=False,\n",
            "use_legacy_prediction_loop=False,\n",
            "use_mps_device=False,\n",
            "warmup_ratio=0.0,\n",
            "warmup_steps=0,\n",
            "weight_decay=0.0,\n",
            "xpu_backend=None,\n",
            ")\n",
            "INFO:__main__:load a local file for train: data/train.json\n",
            "INFO:__main__:load a local file for validation: data/valid.json\n",
            "INFO:__main__:load a local file for test: data/test.json\n",
            "WARNING:datasets.builder:Using custom data configuration default-01aa9d8252a24a0d\n",
            "INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n",
            "INFO:datasets.builder:Generating dataset json (/content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
            "Downloading and preparing dataset json/default to /content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n",
            "Downloading data files: 100% 3/3 [00:00<00:00, 12169.16it/s]\n",
            "INFO:datasets.download.download_manager:Downloading took 0.0 min\n",
            "INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n",
            "Extracting data files: 100% 3/3 [00:00<00:00, 2183.40it/s]\n",
            "INFO:datasets.utils.info_utils:Unable to verify checksums.\n",
            "INFO:datasets.builder:Generating train split\n",
            "INFO:datasets.builder:Generating validation split\n",
            "INFO:datasets.builder:Generating test split\n",
            "INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n",
            "Dataset json downloaded and prepared to /content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n",
            "100% 3/3 [00:00<00:00, 665.62it/s]\n",
            "Downloading (…)lve/main/config.json: 100% 665/665 [00:00<00:00, 125kB/s]\n",
            "[INFO|configuration_utils.py:653] 2023-02-14 21:48:57,052 >> loading configuration file config.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
            "[INFO|configuration_utils.py:705] 2023-02-14 21:48:57,053 >> Model config GPT2Config {\n",
            "  \"_name_or_path\": \"gpt2\",\n",
            "  \"activation_function\": \"gelu_new\",\n",
            "  \"architectures\": [\n",
            "    \"GPT2LMHeadModel\"\n",
            "  ],\n",
            "  \"attn_pdrop\": 0.1,\n",
            "  \"bos_token_id\": 50256,\n",
            "  \"embd_pdrop\": 0.1,\n",
            "  \"eos_token_id\": 50256,\n",
            "  \"id2label\": {\n",
            "    \"0\": \"LABEL_0\",\n",
            "    \"1\": \"LABEL_1\",\n",
            "    \"2\": \"LABEL_2\",\n",
            "    \"3\": \"LABEL_3\",\n",
            "    \"4\": \"LABEL_4\",\n",
            "    \"5\": \"LABEL_5\"\n",
            "  },\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"label2id\": {\n",
            "    \"LABEL_0\": 0,\n",
            "    \"LABEL_1\": 1,\n",
            "    \"LABEL_2\": 2,\n",
            "    \"LABEL_3\": 3,\n",
            "    \"LABEL_4\": 4,\n",
            "    \"LABEL_5\": 5\n",
            "  },\n",
            "  \"layer_norm_epsilon\": 1e-05,\n",
            "  \"model_type\": \"gpt2\",\n",
            "  \"n_ctx\": 1024,\n",
            "  \"n_embd\": 768,\n",
            "  \"n_head\": 12,\n",
            "  \"n_inner\": null,\n",
            "  \"n_layer\": 12,\n",
            "  \"n_positions\": 1024,\n",
            "  \"reorder_and_upcast_attn\": false,\n",
            "  \"resid_pdrop\": 0.1,\n",
            "  \"scale_attn_by_inverse_layer_idx\": false,\n",
            "  \"scale_attn_weights\": true,\n",
            "  \"summary_activation\": null,\n",
            "  \"summary_first_dropout\": 0.1,\n",
            "  \"summary_proj_to_labels\": true,\n",
            "  \"summary_type\": \"cls_index\",\n",
            "  \"summary_use_proj\": true,\n",
            "  \"task_specific_params\": {\n",
            "    \"text-generation\": {\n",
            "      \"do_sample\": true,\n",
            "      \"max_length\": 50\n",
            "    }\n",
            "  },\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 50257\n",
            "}\n",
            "\n",
            "[INFO|tokenization_auto.py:418] 2023-02-14 21:48:57,145 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
            "[INFO|configuration_utils.py:653] 2023-02-14 21:48:57,236 >> loading configuration file config.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
            "[INFO|configuration_utils.py:705] 2023-02-14 21:48:57,237 >> Model config GPT2Config {\n",
            "  \"_name_or_path\": \"gpt2\",\n",
            "  \"activation_function\": \"gelu_new\",\n",
            "  \"architectures\": [\n",
            "    \"GPT2LMHeadModel\"\n",
            "  ],\n",
            "  \"attn_pdrop\": 0.1,\n",
            "  \"bos_token_id\": 50256,\n",
            "  \"embd_pdrop\": 0.1,\n",
            "  \"eos_token_id\": 50256,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"layer_norm_epsilon\": 1e-05,\n",
            "  \"model_type\": \"gpt2\",\n",
            "  \"n_ctx\": 1024,\n",
            "  \"n_embd\": 768,\n",
            "  \"n_head\": 12,\n",
            "  \"n_inner\": null,\n",
            "  \"n_layer\": 12,\n",
            "  \"n_positions\": 1024,\n",
            "  \"reorder_and_upcast_attn\": false,\n",
            "  \"resid_pdrop\": 0.1,\n",
            "  \"scale_attn_by_inverse_layer_idx\": false,\n",
            "  \"scale_attn_weights\": true,\n",
            "  \"summary_activation\": null,\n",
            "  \"summary_first_dropout\": 0.1,\n",
            "  \"summary_proj_to_labels\": true,\n",
            "  \"summary_type\": \"cls_index\",\n",
            "  \"summary_use_proj\": true,\n",
            "  \"task_specific_params\": {\n",
            "    \"text-generation\": {\n",
            "      \"do_sample\": true,\n",
            "      \"max_length\": 50\n",
            "    }\n",
            "  },\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 50257\n",
            "}\n",
            "\n",
            "Downloading (…)olve/main/vocab.json: 100% 1.04M/1.04M [00:00<00:00, 9.20MB/s]\n",
            "Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 6.19MB/s]\n",
            "Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 11.7MB/s]\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file vocab.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/vocab.json\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file merges.txt from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/merges.txt\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file tokenizer.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer.json\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file added_tokens.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file special_tokens_map.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file tokenizer_config.json from cache at None\n",
            "[INFO|configuration_utils.py:653] 2023-02-14 21:48:58,447 >> loading configuration file config.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
            "[INFO|configuration_utils.py:705] 2023-02-14 21:48:58,448 >> Model config GPT2Config {\n",
            "  \"_name_or_path\": \"gpt2\",\n",
            "  \"activation_function\": \"gelu_new\",\n",
            "  \"architectures\": [\n",
            "    \"GPT2LMHeadModel\"\n",
            "  ],\n",
            "  \"attn_pdrop\": 0.1,\n",
            "  \"bos_token_id\": 50256,\n",
            "  \"embd_pdrop\": 0.1,\n",
            "  \"eos_token_id\": 50256,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"layer_norm_epsilon\": 1e-05,\n",
            "  \"model_type\": \"gpt2\",\n",
            "  \"n_ctx\": 1024,\n",
            "  \"n_embd\": 768,\n",
            "  \"n_head\": 12,\n",
            "  \"n_inner\": null,\n",
            "  \"n_layer\": 12,\n",
            "  \"n_positions\": 1024,\n",
            "  \"reorder_and_upcast_attn\": false,\n",
            "  \"resid_pdrop\": 0.1,\n",
            "  \"scale_attn_by_inverse_layer_idx\": false,\n",
            "  \"scale_attn_weights\": true,\n",
            "  \"summary_activation\": null,\n",
            "  \"summary_first_dropout\": 0.1,\n",
            "  \"summary_proj_to_labels\": true,\n",
            "  \"summary_type\": \"cls_index\",\n",
            "  \"summary_use_proj\": true,\n",
            "  \"task_specific_params\": {\n",
            "    \"text-generation\": {\n",
            "      \"do_sample\": true,\n",
            "      \"max_length\": 50\n",
            "    }\n",
            "  },\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 50257\n",
            "}\n",
            "\n",
            "INFO:__main__:Using implementation from class: AutoModelForSequenceClassification\n",
            "Downloading (…)\"pytorch_model.bin\";: 100% 548M/548M [00:05<00:00, 108MB/s]\n",
            "[INFO|modeling_utils.py:2156] 2023-02-14 21:49:03,784 >> loading weights file pytorch_model.bin from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin\n",
            "[INFO|modeling_utils.py:2606] 2023-02-14 21:49:05,169 >> All model checkpoint weights were used when initializing GPT2ForSequenceClassification.\n",
            "\n",
            "[WARNING|modeling_utils.py:2608] 2023-02-14 21:49:05,169 >> Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']\n",
            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
            "[ERROR|tokenization_utils_base.py:1019] 2023-02-14 21:49:05,177 >> Using pad_token, but it is not set yet.\n",
            "INFO:__main__:Set PAD token to EOS: <|endoftext|>\n",
            "Running tokenizer on dataset:   0% 0/16 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-bb8faaac56c0b87e.arrow\n",
            "Running tokenizer on dataset: 100% 16/16 [00:00<00:00, 20.23ba/s]\n",
            "Running tokenizer on dataset:   0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-7b339bb99d7c17a1.arrow\n",
            "Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 20.04ba/s]\n",
            "Running tokenizer on dataset:   0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-82acdaa33d6aa0eb.arrow\n",
            "Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 20.92ba/s]\n",
            "INFO:__main__:Sample 10476 of the training set: {'label': 0, 'text': 'i do find new friends i m going to try extra hard to make them stay and if i decide that i don t want to feel hurt again and just ride out the last year of school on my own i m going to have to try extra hard not to care what people think of me being a loner', 'input_ids': [72, 466, 1064, 649, 2460, 1312, 285, 1016, 284, 1949, 3131, 1327, 284, 787, 606, 2652, 290, 611, 1312, 5409, 326, 1312, 836, 256, 765, 284, 1254, 5938, 757, 290, 655, 6594, 503, 262, 938, 614, 286, 1524, 319, 616, 898, 1312, 285, 1016, 284, 423, 284, 1949, 3131, 1327, 407, 284, 1337, 644, 661, 892, 286, 502, 852, 257, 300, 14491, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
            "INFO:__main__:Sample 1824 of the training set: {'label': 1, 'text': 'i asked them to join me in creating a world where all year old girls could grow up feeling hopeful and powerful', 'input_ids': [72, 1965, 606, 284, 4654, 502, 287, 4441, 257, 995, 810, 477, 614, 1468, 4813, 714, 1663, 510, 4203, 17836, 290, 3665, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
            "INFO:__main__:Sample 409 of the training set: {'label': 2, 'text': 'i feel when you are a caring person you attract other caring people into your life', 'input_ids': [72, 1254, 618, 345, 389, 257, 18088, 1048, 345, 4729, 584, 18088, 661, 656, 534, 1204, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
            "[INFO|trainer.py:503] 2023-02-14 21:49:08,712 >> max_steps is given, it will override any value given in num_train_epochs\n",
            "[INFO|trainer.py:725] 2023-02-14 21:49:08,712 >> The following columns in the training set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`,  you can safely ignore this message.\n",
            "/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
            "  warnings.warn(\n",
            "[INFO|trainer.py:1607] 2023-02-14 21:49:08,718 >> ***** Running training *****\n",
            "[INFO|trainer.py:1608] 2023-02-14 21:49:08,718 >>   Num examples = 16000\n",
            "[INFO|trainer.py:1609] 2023-02-14 21:49:08,718 >>   Num Epochs = 4\n",
            "[INFO|trainer.py:1610] 2023-02-14 21:49:08,719 >>   Instantaneous batch size per device = 24\n",
            "[INFO|trainer.py:1611] 2023-02-14 21:49:08,719 >>   Total train batch size (w. parallel, distributed & accumulation) = 24\n",
            "[INFO|trainer.py:1612] 2023-02-14 21:49:08,719 >>   Gradient Accumulation steps = 1\n",
            "[INFO|trainer.py:1613] 2023-02-14 21:49:08,719 >>   Total optimization steps = 2500\n",
            "{'loss': 2.3442, 'learning_rate': 1.9200000000000003e-05, 'epoch': 0.15}\n",
            "{'loss': 1.3126, 'learning_rate': 1.8400000000000003e-05, 'epoch': 0.3}\n",
            " 10% 250/2500 [00:37<05:31,  6.79it/s][INFO|trainer.py:725] 2023-02-14 21:49:46,426 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:49:46,428 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:49:46,428 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:49:46,428 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  4% 3/84 [00:00<00:02, 29.40it/s]\u001b[A\n",
            "  7% 6/84 [00:00<00:03, 23.74it/s]\u001b[A\n",
            " 11% 9/84 [00:00<00:03, 22.40it/s]\u001b[A\n",
            " 14% 12/84 [00:00<00:03, 21.78it/s]\u001b[A\n",
            " 18% 15/84 [00:00<00:03, 21.50it/s]\u001b[A\n",
            " 21% 18/84 [00:00<00:03, 21.30it/s]\u001b[A\n",
            " 25% 21/84 [00:00<00:02, 21.20it/s]\u001b[A\n",
            " 29% 24/84 [00:01<00:02, 20.97it/s]\u001b[A\n",
            " 32% 27/84 [00:01<00:02, 20.93it/s]\u001b[A\n",
            " 36% 30/84 [00:01<00:02, 20.97it/s]\u001b[A\n",
            " 39% 33/84 [00:01<00:02, 21.00it/s]\u001b[A\n",
            " 43% 36/84 [00:01<00:02, 21.01it/s]\u001b[A\n",
            " 46% 39/84 [00:01<00:02, 21.03it/s]\u001b[A\n",
            " 50% 42/84 [00:01<00:01, 21.03it/s]\u001b[A\n",
            " 54% 45/84 [00:02<00:01, 21.02it/s]\u001b[A\n",
            " 57% 48/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
            " 61% 51/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
            " 64% 54/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
            " 68% 57/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
            " 71% 60/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
            " 75% 63/84 [00:02<00:00, 21.00it/s]\u001b[A\n",
            " 79% 66/84 [00:03<00:00, 20.99it/s]\u001b[A\n",
            " 82% 69/84 [00:03<00:00, 20.94it/s]\u001b[A\n",
            " 86% 72/84 [00:03<00:00, 20.95it/s]\u001b[A\n",
            " 89% 75/84 [00:03<00:00, 20.98it/s]\u001b[A\n",
            " 93% 78/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
            " 96% 81/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
            "100% 84/84 [00:03<00:00, 22.24it/s]\u001b[A\n",
            "{'eval_loss': 0.7983964085578918, 'eval_accuracy': 0.7465000152587891, 'eval_runtime': 3.9877, 'eval_samples_per_second': 501.548, 'eval_steps_per_second': 21.065, 'epoch': 0.37}\n",
            "\n",
            " 10% 250/2500 [00:41<05:31,  6.79it/s]\n",
            "{'loss': 0.7216, 'learning_rate': 1.76e-05, 'epoch': 0.45}\n",
            "{'loss': 0.5032, 'learning_rate': 1.6800000000000002e-05, 'epoch': 0.6}\n",
            "{'loss': 0.3904, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.75}\n",
            " 20% 500/2500 [01:18<04:56,  6.74it/s][INFO|trainer.py:725] 2023-02-14 21:50:27,312 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:50:27,314 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:50:27,314 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:50:27,314 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 27.77it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 23.71it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 22.34it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 21.72it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 21.40it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 21.09it/s]\u001b[A\n",
            " 26% 22/84 [00:01<00:02, 21.01it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 20.95it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 20.92it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 20.87it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 20.91it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 20.95it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 20.91it/s]\u001b[A\n",
            " 51% 43/84 [00:02<00:01, 20.96it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 20.82it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 20.87it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 20.90it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 20.94it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 20.97it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
            " 76% 64/84 [00:03<00:00, 21.01it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 21.01it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 21.03it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 21.02it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 21.02it/s]\u001b[A\n",
            " 98% 82/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
            "{'eval_loss': 0.29131895303726196, 'eval_accuracy': 0.9035000205039978, 'eval_runtime': 3.9922, 'eval_samples_per_second': 500.974, 'eval_steps_per_second': 21.041, 'epoch': 0.75}\n",
            "\n",
            " 20% 500/2500 [01:22<04:56,  6.74it/s]\n",
            "                                   \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:50:31,307 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-500\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 21:50:31,308 >> Configuration saved in out/emotion/gpt2/checkpoint-500/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 21:50:32,356 >> Model weights saved in out/emotion/gpt2/checkpoint-500/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:50:32,357 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-500/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:50:32,357 >> Special tokens file saved in out/emotion/gpt2/checkpoint-500/special_tokens_map.json\n",
            "{'loss': 0.3554, 'learning_rate': 1.5200000000000002e-05, 'epoch': 0.9}\n",
            "{'loss': 0.2871, 'learning_rate': 1.4400000000000001e-05, 'epoch': 1.05}\n",
            " 30% 750/2500 [02:02<04:19,  6.74it/s][INFO|trainer.py:725] 2023-02-14 21:51:11,104 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:51:11,106 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:51:11,106 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:51:11,106 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 27.92it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 23.90it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 22.57it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 21.98it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 21.63it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 21.40it/s]\u001b[A\n",
            " 26% 22/84 [00:00<00:02, 21.31it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 21.22it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 21.17it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 21.12it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 21.03it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 21.03it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 21.02it/s]\u001b[A\n",
            " 51% 43/84 [00:01<00:01, 21.04it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 21.04it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 21.07it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 21.07it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 21.03it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 21.03it/s]\u001b[A\n",
            " 76% 64/84 [00:02<00:00, 21.04it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
            " 98% 82/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
            "{'eval_loss': 0.2168988287448883, 'eval_accuracy': 0.9235000014305115, 'eval_runtime': 3.9688, 'eval_samples_per_second': 503.925, 'eval_steps_per_second': 21.165, 'epoch': 1.12}\n",
            "\n",
            " 30% 750/2500 [02:06<04:19,  6.74it/s]\n",
            "{'loss': 0.2285, 'learning_rate': 1.3600000000000002e-05, 'epoch': 1.2}\n",
            "{'loss': 0.1888, 'learning_rate': 1.2800000000000001e-05, 'epoch': 1.35}\n",
            "{'loss': 0.2106, 'learning_rate': 1.2e-05, 'epoch': 1.5}\n",
            " 40% 1000/2500 [02:43<03:41,  6.78it/s][INFO|trainer.py:725] 2023-02-14 21:51:51,748 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:51:51,749 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:51:51,750 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:51:51,750 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 28.08it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 23.96it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 22.63it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 21.99it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 21.68it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 21.48it/s]\u001b[A\n",
            " 26% 22/84 [00:00<00:02, 21.32it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 21.23it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 21.15it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 21.10it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 21.08it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 21.08it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 21.07it/s]\u001b[A\n",
            " 51% 43/84 [00:01<00:01, 21.05it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 21.05it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 21.04it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 21.02it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 21.03it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 21.04it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 21.04it/s]\u001b[A\n",
            " 76% 64/84 [00:02<00:00, 21.03it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
            " 98% 82/84 [00:03<00:00, 21.08it/s]\u001b[A\n",
            "{'eval_loss': 0.19490236043930054, 'eval_accuracy': 0.9259999990463257, 'eval_runtime': 3.9658, 'eval_samples_per_second': 504.311, 'eval_steps_per_second': 21.181, 'epoch': 1.5}\n",
            "\n",
            " 40% 1000/2500 [02:46<03:41,  6.78it/s]\n",
            "                                   \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:51:55,716 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-1000\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 21:51:55,717 >> Configuration saved in out/emotion/gpt2/checkpoint-1000/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 21:51:56,708 >> Model weights saved in out/emotion/gpt2/checkpoint-1000/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:51:56,709 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-1000/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:51:56,709 >> Special tokens file saved in out/emotion/gpt2/checkpoint-1000/special_tokens_map.json\n",
            "{'loss': 0.1906, 'learning_rate': 1.1200000000000001e-05, 'epoch': 1.65}\n",
            "{'loss': 0.1793, 'learning_rate': 1.04e-05, 'epoch': 1.8}\n",
            " 50% 1250/2500 [03:26<03:04,  6.76it/s][INFO|trainer.py:725] 2023-02-14 21:52:35,220 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:52:35,222 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:52:35,222 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:52:35,222 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 27.99it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 23.91it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 22.61it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 22.00it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 21.66it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 21.45it/s]\u001b[A\n",
            " 26% 22/84 [00:00<00:02, 21.34it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 21.26it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 21.21it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 21.17it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 21.14it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 21.11it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 21.12it/s]\u001b[A\n",
            " 51% 43/84 [00:01<00:01, 21.11it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 21.10it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 21.09it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 21.10it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 21.09it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 21.06it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 21.08it/s]\u001b[A\n",
            " 76% 64/84 [00:02<00:00, 21.09it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 21.09it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 21.08it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
            " 98% 82/84 [00:03<00:00, 21.08it/s]\u001b[A\n",
            "{'eval_loss': 0.1607103943824768, 'eval_accuracy': 0.9319999814033508, 'eval_runtime': 3.9612, 'eval_samples_per_second': 504.895, 'eval_steps_per_second': 21.206, 'epoch': 1.87}\n",
            "\n",
            " 50% 1250/2500 [03:30<03:04,  6.76it/s]\n",
            "{'loss': 0.2116, 'learning_rate': 9.600000000000001e-06, 'epoch': 1.95}\n",
            "{'loss': 0.1536, 'learning_rate': 8.8e-06, 'epoch': 2.1}\n",
            "{'loss': 0.1518, 'learning_rate': 8.000000000000001e-06, 'epoch': 2.25}\n",
            " 60% 1500/2500 [04:07<02:26,  6.82it/s][INFO|trainer.py:725] 2023-02-14 21:53:15,831 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:53:15,833 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:53:15,833 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:53:15,833 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 28.10it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 23.90it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 22.58it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 21.85it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 21.53it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 21.37it/s]\u001b[A\n",
            " 26% 22/84 [00:01<00:02, 21.27it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 21.19it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 21.13it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 21.11it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 21.04it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 20.94it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 20.94it/s]\u001b[A\n",
            " 51% 43/84 [00:02<00:01, 20.94it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 20.97it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 20.97it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 20.98it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 20.93it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 20.94it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 20.98it/s]\u001b[A\n",
            " 76% 64/84 [00:03<00:00, 20.97it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 20.99it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 21.02it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
            " 98% 82/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
            "{'eval_loss': 0.160899356007576, 'eval_accuracy': 0.9330000281333923, 'eval_runtime': 3.9773, 'eval_samples_per_second': 502.855, 'eval_steps_per_second': 21.12, 'epoch': 2.25}\n",
            "\n",
            " 60% 1500/2500 [04:11<02:26,  6.82it/s]\n",
            "                                   \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:53:19,811 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-1500\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 21:53:19,812 >> Configuration saved in out/emotion/gpt2/checkpoint-1500/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 21:53:21,455 >> Model weights saved in out/emotion/gpt2/checkpoint-1500/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:53:21,456 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-1500/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:53:21,456 >> Special tokens file saved in out/emotion/gpt2/checkpoint-1500/special_tokens_map.json\n",
            "{'loss': 0.157, 'learning_rate': 7.2000000000000005e-06, 'epoch': 2.4}\n",
            "{'loss': 0.141, 'learning_rate': 6.4000000000000006e-06, 'epoch': 2.55}\n",
            " 70% 1750/2500 [04:51<01:50,  6.80it/s][INFO|trainer.py:725] 2023-02-14 21:54:00,007 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:54:00,009 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:54:00,009 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:54:00,009 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 27.89it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 23.82it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 22.49it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 21.85it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 21.48it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 21.31it/s]\u001b[A\n",
            " 26% 22/84 [00:01<00:02, 21.20it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 21.09it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 21.00it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 20.99it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 21.00it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 20.98it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 20.98it/s]\u001b[A\n",
            " 51% 43/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 21.02it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 21.02it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 21.02it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
            " 76% 64/84 [00:03<00:00, 21.03it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
            " 98% 82/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
            "{'eval_loss': 0.15204769372940063, 'eval_accuracy': 0.9319999814033508, 'eval_runtime': 3.9769, 'eval_samples_per_second': 502.901, 'eval_steps_per_second': 21.122, 'epoch': 2.62}\n",
            "\n",
            " 70% 1750/2500 [04:55<01:50,  6.80it/s]\n",
            "{'loss': 0.1426, 'learning_rate': 5.600000000000001e-06, 'epoch': 2.7}\n",
            "{'loss': 0.1463, 'learning_rate': 4.800000000000001e-06, 'epoch': 2.85}\n",
            "{'loss': 0.1403, 'learning_rate': 4.000000000000001e-06, 'epoch': 3.0}\n",
            " 80% 2000/2500 [05:31<01:13,  6.82it/s][INFO|trainer.py:725] 2023-02-14 21:54:40,633 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:54:40,635 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:54:40,635 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:54:40,635 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 27.95it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 23.86it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 22.54it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 21.95it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 21.60it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 21.42it/s]\u001b[A\n",
            " 26% 22/84 [00:00<00:02, 21.29it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 21.14it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 21.10it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 21.07it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 21.08it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 21.05it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 21.06it/s]\u001b[A\n",
            " 51% 43/84 [00:01<00:01, 21.04it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 20.96it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 20.97it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 20.96it/s]\u001b[A\n",
            " 76% 64/84 [00:03<00:00, 20.97it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 20.94it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 20.95it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 20.95it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
            " 98% 82/84 [00:03<00:00, 21.02it/s]\u001b[A\n",
            "{'eval_loss': 0.14609387516975403, 'eval_accuracy': 0.9290000200271606, 'eval_runtime': 3.9774, 'eval_samples_per_second': 502.846, 'eval_steps_per_second': 21.12, 'epoch': 3.0}\n",
            "\n",
            " 80% 2000/2500 [05:35<01:13,  6.82it/s]\n",
            "                                   \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:54:44,614 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-2000\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 21:54:44,615 >> Configuration saved in out/emotion/gpt2/checkpoint-2000/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 21:54:46,838 >> Model weights saved in out/emotion/gpt2/checkpoint-2000/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:54:46,839 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-2000/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:54:46,839 >> Special tokens file saved in out/emotion/gpt2/checkpoint-2000/special_tokens_map.json\n",
            "{'loss': 0.1256, 'learning_rate': 3.2000000000000003e-06, 'epoch': 3.15}\n",
            "{'loss': 0.1246, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.3}\n",
            " 90% 2250/2500 [06:16<00:36,  6.76it/s][INFO|trainer.py:725] 2023-02-14 21:55:25,309 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:55:25,311 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:55:25,311 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:55:25,311 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 27.89it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 23.86it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 22.52it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 21.87it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 21.57it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 21.40it/s]\u001b[A\n",
            " 26% 22/84 [00:01<00:02, 21.29it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 21.22it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 21.18it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 21.15it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 21.14it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 21.12it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 21.10it/s]\u001b[A\n",
            " 51% 43/84 [00:01<00:01, 21.09it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 21.09it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 21.10it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 21.10it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 21.10it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 21.10it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 21.06it/s]\u001b[A\n",
            " 76% 64/84 [00:02<00:00, 21.06it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 21.02it/s]\u001b[A\n",
            " 98% 82/84 [00:03<00:00, 21.01it/s]\u001b[A\n",
            "{'eval_loss': 0.15553689002990723, 'eval_accuracy': 0.9294999837875366, 'eval_runtime': 3.967, 'eval_samples_per_second': 504.158, 'eval_steps_per_second': 21.175, 'epoch': 3.37}\n",
            "\n",
            " 90% 2250/2500 [06:20<00:36,  6.76it/s]\n",
            "{'loss': 0.1174, 'learning_rate': 1.6000000000000001e-06, 'epoch': 3.45}\n",
            "{'loss': 0.1374, 'learning_rate': 8.000000000000001e-07, 'epoch': 3.6}\n",
            "{'loss': 0.1207, 'learning_rate': 0.0, 'epoch': 3.75}\n",
            "100% 2500/2500 [06:57<00:00,  6.82it/s][INFO|trainer.py:725] 2023-02-14 21:56:05,969 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:56:05,971 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:56:05,971 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:56:05,971 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 27.94it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 23.89it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 22.60it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 21.97it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 21.57it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 21.34it/s]\u001b[A\n",
            " 26% 22/84 [00:01<00:02, 21.23it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 21.12it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 21.09it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 21.09it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 21.07it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 21.06it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 21.01it/s]\u001b[A\n",
            " 51% 43/84 [00:02<00:01, 21.03it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 21.02it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 20.97it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 20.45it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 20.64it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 20.77it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 20.84it/s]\u001b[A\n",
            " 76% 64/84 [00:03<00:00, 20.92it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 20.97it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 20.99it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 21.02it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 21.03it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
            " 98% 82/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
            "{'eval_loss': 0.15162073075771332, 'eval_accuracy': 0.9309999942779541, 'eval_runtime': 3.9841, 'eval_samples_per_second': 501.992, 'eval_steps_per_second': 21.084, 'epoch': 3.75}\n",
            "\n",
            "100% 2500/2500 [07:01<00:00,  6.82it/s]\n",
            "                                   \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:56:09,956 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-2500\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 21:56:09,957 >> Configuration saved in out/emotion/gpt2/checkpoint-2500/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 21:56:10,953 >> Model weights saved in out/emotion/gpt2/checkpoint-2500/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:56:10,954 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-2500/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:56:10,954 >> Special tokens file saved in out/emotion/gpt2/checkpoint-2500/special_tokens_map.json\n",
            "[INFO|trainer.py:1852] 2023-02-14 21:56:12,777 >> \n",
            "\n",
            "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
            "\n",
            "\n",
            "[INFO|trainer.py:1946] 2023-02-14 21:56:12,778 >> Loading best model from out/emotion/gpt2/checkpoint-1500 (score: 0.9330000281333923).\n",
            "{'train_runtime': 424.4983, 'train_samples_per_second': 141.343, 'train_steps_per_second': 5.889, 'train_loss': 0.351297896194458, 'epoch': 3.75}\n",
            "100% 2500/2500 [07:04<00:00,  5.89it/s]\n",
            "[INFO|trainer.py:2656] 2023-02-14 21:56:13,218 >> Saving model checkpoint to out/emotion/gpt2\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 21:56:13,220 >> Configuration saved in out/emotion/gpt2/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 21:56:14,063 >> Model weights saved in out/emotion/gpt2/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:56:14,064 >> tokenizer config file saved in out/emotion/gpt2/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:56:14,064 >> Special tokens file saved in out/emotion/gpt2/special_tokens_map.json\n",
            "***** train metrics *****\n",
            "  epoch                    =       3.75\n",
            "  train_loss               =     0.3513\n",
            "  train_runtime            = 0:07:04.49\n",
            "  train_samples            =      16000\n",
            "  train_samples_per_second =    141.343\n",
            "  train_steps_per_second   =      5.889\n",
            "INFO:__main__:*** Evaluate ***\n",
            "[INFO|trainer.py:725] 2023-02-14 21:56:14,169 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:56:14,170 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:56:14,170 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:56:14,170 >>   Batch size = 24\n",
            "100% 84/84 [00:03<00:00, 21.20it/s]\n",
            "***** eval metrics *****\n",
            "  epoch                   =       3.75\n",
            "  eval_accuracy           =      0.933\n",
            "  eval_loss               =     0.1609\n",
            "  eval_runtime            = 0:00:04.02\n",
            "  eval_samples            =       2000\n",
            "  eval_samples_per_second =    497.496\n",
            "  eval_steps_per_second   =     20.895\n",
            "INFO:__main__:*** Predict ***\n",
            "[INFO|trainer.py:725] 2023-02-14 21:56:18,194 >> The following columns in the test set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:56:18,195 >> ***** Running Prediction *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:56:18,195 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:56:18,195 >>   Batch size = 24\n",
            "100% 84/84 [00:03<00:00, 21.40it/s]\n",
            "INFO:__main__:***** Predict results None *****\n",
            "[INFO|modelcard.py:444] 2023-02-14 21:56:22,304 >> Dropping the following result as it does not have all the necessary fields:\n",
            "{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.9330000281333923}]}\n"
          ]
        }
      ],
      "source": [
        "!python run_glue.py \\\n",
        "  --cache_dir gtp_cache_training \\\n",
        "  --model_name_or_path gpt2 \\\n",
        "  --train_file data/train.json  \\\n",
        "  --validation_file data/valid.json \\\n",
        "  --test_file data/test.json  \\\n",
        "  --per_device_train_batch_size 24  \\\n",
        "  --per_device_eval_batch_size 24 \\\n",
        "  --do_train  \\\n",
        "  --do_eval \\\n",
        "  --do_predict  \\\n",
        "  --max_seq_length 128  \\\n",
        "  --learning_rate 2e-5  \\\n",
        "  --num_train_epochs 1  \\\n",
        "  --output_dir out/emotion/gpt2  \\\n",
        "  --overwrite_output_dir \\\n",
        "  --eval_steps 250 \\\n",
        "  --evaluation_strategy steps \\\n",
        "  --metric_for_best_model accuracy \\\n",
        "  --logging_steps 100 \\\n",
        "  --save_total_limit 5 \\\n",
        "  --max_steps 2500 \\\n",
        "  --load_best_model_at_end True "
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "- full dataset\n",
        "- custom head"
      ],
      "metadata": {
        "id": "zJeUGay5n1JW"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!python run_glue.py \\\n",
        "  --cache_dir gtp_custom_cache_training \\\n",
        "  --model_name_or_path gpt2 \\\n",
        "  --custom_model gpt2_custom  \\\n",
        "  --train_file data/train.json  \\\n",
        "  --validation_file data/valid.json \\\n",
        "  --test_file data/test.json  \\\n",
        "  --per_device_train_batch_size 24  \\\n",
        "  --per_device_eval_batch_size 24 \\\n",
        "  --do_train  \\\n",
        "  --do_eval \\\n",
        "  --do_predict  \\\n",
        "  --max_seq_length 128  \\\n",
        "  --learning_rate 2e-5  \\\n",
        "  --num_train_epochs 1  \\\n",
        "  --output_dir out/emotion/gpt2_custom  \\\n",
        "  --overwrite_output_dir \\\n",
        "  --eval_steps 250 \\\n",
        "  --evaluation_strategy steps \\\n",
        "  --metric_for_best_model accuracy \\\n",
        "  --logging_steps 100 \\\n",
        "  --save_total_limit 5 \\\n",
        "  --max_steps 2500 \\\n",
        "  --load_best_model_at_end True "
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "LXRMDiD-n1nG",
        "outputId": "1383e6a3-b485-49a0-d111-05bea71acd23"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "2023-02-14 21:56:25.884599: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA\n",
            "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
            "2023-02-14 21:56:26.040127: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
            "2023-02-14 21:56:26.823479: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
            "2023-02-14 21:56:26.823595: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
            "2023-02-14 21:56:26.823615: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
            "WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
            "INFO:__main__:Training/evaluation parameters TrainingArguments(\n",
            "_n_gpu=1,\n",
            "adafactor=False,\n",
            "adam_beta1=0.9,\n",
            "adam_beta2=0.999,\n",
            "adam_epsilon=1e-08,\n",
            "auto_find_batch_size=False,\n",
            "bf16=False,\n",
            "bf16_full_eval=False,\n",
            "data_seed=None,\n",
            "dataloader_drop_last=False,\n",
            "dataloader_num_workers=0,\n",
            "dataloader_pin_memory=True,\n",
            "ddp_bucket_cap_mb=None,\n",
            "ddp_find_unused_parameters=None,\n",
            "ddp_timeout=1800,\n",
            "debug=[],\n",
            "deepspeed=None,\n",
            "disable_tqdm=False,\n",
            "do_eval=True,\n",
            "do_predict=True,\n",
            "do_train=True,\n",
            "eval_accumulation_steps=None,\n",
            "eval_delay=0,\n",
            "eval_steps=250,\n",
            "evaluation_strategy=steps,\n",
            "fp16=False,\n",
            "fp16_backend=auto,\n",
            "fp16_full_eval=False,\n",
            "fp16_opt_level=O1,\n",
            "fsdp=[],\n",
            "fsdp_min_num_params=0,\n",
            "fsdp_transformer_layer_cls_to_wrap=None,\n",
            "full_determinism=False,\n",
            "gradient_accumulation_steps=1,\n",
            "gradient_checkpointing=False,\n",
            "greater_is_better=True,\n",
            "group_by_length=False,\n",
            "half_precision_backend=auto,\n",
            "hub_model_id=None,\n",
            "hub_private_repo=False,\n",
            "hub_strategy=every_save,\n",
            "hub_token=<HUB_TOKEN>,\n",
            "ignore_data_skip=False,\n",
            "include_inputs_for_metrics=False,\n",
            "jit_mode_eval=False,\n",
            "label_names=None,\n",
            "label_smoothing_factor=0.0,\n",
            "learning_rate=2e-05,\n",
            "length_column_name=length,\n",
            "load_best_model_at_end=True,\n",
            "local_rank=-1,\n",
            "log_level=passive,\n",
            "log_level_replica=passive,\n",
            "log_on_each_node=True,\n",
            "logging_dir=out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00,\n",
            "logging_first_step=False,\n",
            "logging_nan_inf_filter=True,\n",
            "logging_steps=100,\n",
            "logging_strategy=steps,\n",
            "lr_scheduler_type=linear,\n",
            "max_grad_norm=1.0,\n",
            "max_steps=2500,\n",
            "metric_for_best_model=accuracy,\n",
            "mp_parameters=,\n",
            "no_cuda=False,\n",
            "num_train_epochs=1.0,\n",
            "optim=adamw_hf,\n",
            "output_dir=out/emotion/gpt2_custom,\n",
            "overwrite_output_dir=True,\n",
            "past_index=-1,\n",
            "per_device_eval_batch_size=24,\n",
            "per_device_train_batch_size=24,\n",
            "prediction_loss_only=False,\n",
            "push_to_hub=False,\n",
            "push_to_hub_model_id=None,\n",
            "push_to_hub_organization=None,\n",
            "push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
            "ray_scope=last,\n",
            "remove_unused_columns=True,\n",
            "report_to=['tensorboard'],\n",
            "resume_from_checkpoint=None,\n",
            "run_name=out/emotion/gpt2_custom,\n",
            "save_on_each_node=False,\n",
            "save_steps=500,\n",
            "save_strategy=steps,\n",
            "save_total_limit=5,\n",
            "seed=42,\n",
            "sharded_ddp=[],\n",
            "skip_memory_metrics=True,\n",
            "tf32=None,\n",
            "torchdynamo=None,\n",
            "tpu_metrics_debug=False,\n",
            "tpu_num_cores=None,\n",
            "use_ipex=False,\n",
            "use_legacy_prediction_loop=False,\n",
            "use_mps_device=False,\n",
            "warmup_ratio=0.0,\n",
            "warmup_steps=0,\n",
            "weight_decay=0.0,\n",
            "xpu_backend=None,\n",
            ")\n",
            "INFO:__main__:load a local file for train: data/train.json\n",
            "INFO:__main__:load a local file for validation: data/valid.json\n",
            "INFO:__main__:load a local file for test: data/test.json\n",
            "WARNING:datasets.builder:Using custom data configuration default-01aa9d8252a24a0d\n",
            "INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n",
            "INFO:datasets.builder:Generating dataset json (/content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
            "Downloading and preparing dataset json/default to /content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n",
            "Downloading data files: 100% 3/3 [00:00<00:00, 14138.10it/s]\n",
            "INFO:datasets.download.download_manager:Downloading took 0.0 min\n",
            "INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n",
            "Extracting data files: 100% 3/3 [00:00<00:00, 2175.09it/s]\n",
            "INFO:datasets.utils.info_utils:Unable to verify checksums.\n",
            "INFO:datasets.builder:Generating train split\n",
            "INFO:datasets.builder:Generating validation split\n",
            "INFO:datasets.builder:Generating test split\n",
            "INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n",
            "Dataset json downloaded and prepared to /content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n",
            "100% 3/3 [00:00<00:00, 672.49it/s]\n",
            "Downloading (…)lve/main/config.json: 100% 665/665 [00:00<00:00, 123kB/s]\n",
            "[INFO|configuration_utils.py:653] 2023-02-14 21:56:30,068 >> loading configuration file config.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
            "[INFO|configuration_utils.py:705] 2023-02-14 21:56:30,068 >> Model config GPT2Config {\n",
            "  \"_name_or_path\": \"gpt2\",\n",
            "  \"activation_function\": \"gelu_new\",\n",
            "  \"architectures\": [\n",
            "    \"GPT2LMHeadModel\"\n",
            "  ],\n",
            "  \"attn_pdrop\": 0.1,\n",
            "  \"bos_token_id\": 50256,\n",
            "  \"embd_pdrop\": 0.1,\n",
            "  \"eos_token_id\": 50256,\n",
            "  \"id2label\": {\n",
            "    \"0\": \"LABEL_0\",\n",
            "    \"1\": \"LABEL_1\",\n",
            "    \"2\": \"LABEL_2\",\n",
            "    \"3\": \"LABEL_3\",\n",
            "    \"4\": \"LABEL_4\",\n",
            "    \"5\": \"LABEL_5\"\n",
            "  },\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"label2id\": {\n",
            "    \"LABEL_0\": 0,\n",
            "    \"LABEL_1\": 1,\n",
            "    \"LABEL_2\": 2,\n",
            "    \"LABEL_3\": 3,\n",
            "    \"LABEL_4\": 4,\n",
            "    \"LABEL_5\": 5\n",
            "  },\n",
            "  \"layer_norm_epsilon\": 1e-05,\n",
            "  \"model_type\": \"gpt2\",\n",
            "  \"n_ctx\": 1024,\n",
            "  \"n_embd\": 768,\n",
            "  \"n_head\": 12,\n",
            "  \"n_inner\": null,\n",
            "  \"n_layer\": 12,\n",
            "  \"n_positions\": 1024,\n",
            "  \"reorder_and_upcast_attn\": false,\n",
            "  \"resid_pdrop\": 0.1,\n",
            "  \"scale_attn_by_inverse_layer_idx\": false,\n",
            "  \"scale_attn_weights\": true,\n",
            "  \"summary_activation\": null,\n",
            "  \"summary_first_dropout\": 0.1,\n",
            "  \"summary_proj_to_labels\": true,\n",
            "  \"summary_type\": \"cls_index\",\n",
            "  \"summary_use_proj\": true,\n",
            "  \"task_specific_params\": {\n",
            "    \"text-generation\": {\n",
            "      \"do_sample\": true,\n",
            "      \"max_length\": 50\n",
            "    }\n",
            "  },\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 50257\n",
            "}\n",
            "\n",
            "[INFO|tokenization_auto.py:418] 2023-02-14 21:56:30,162 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
            "[INFO|configuration_utils.py:653] 2023-02-14 21:56:30,251 >> loading configuration file config.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
            "[INFO|configuration_utils.py:705] 2023-02-14 21:56:30,252 >> Model config GPT2Config {\n",
            "  \"_name_or_path\": \"gpt2\",\n",
            "  \"activation_function\": \"gelu_new\",\n",
            "  \"architectures\": [\n",
            "    \"GPT2LMHeadModel\"\n",
            "  ],\n",
            "  \"attn_pdrop\": 0.1,\n",
            "  \"bos_token_id\": 50256,\n",
            "  \"embd_pdrop\": 0.1,\n",
            "  \"eos_token_id\": 50256,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"layer_norm_epsilon\": 1e-05,\n",
            "  \"model_type\": \"gpt2\",\n",
            "  \"n_ctx\": 1024,\n",
            "  \"n_embd\": 768,\n",
            "  \"n_head\": 12,\n",
            "  \"n_inner\": null,\n",
            "  \"n_layer\": 12,\n",
            "  \"n_positions\": 1024,\n",
            "  \"reorder_and_upcast_attn\": false,\n",
            "  \"resid_pdrop\": 0.1,\n",
            "  \"scale_attn_by_inverse_layer_idx\": false,\n",
            "  \"scale_attn_weights\": true,\n",
            "  \"summary_activation\": null,\n",
            "  \"summary_first_dropout\": 0.1,\n",
            "  \"summary_proj_to_labels\": true,\n",
            "  \"summary_type\": \"cls_index\",\n",
            "  \"summary_use_proj\": true,\n",
            "  \"task_specific_params\": {\n",
            "    \"text-generation\": {\n",
            "      \"do_sample\": true,\n",
            "      \"max_length\": 50\n",
            "    }\n",
            "  },\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 50257\n",
            "}\n",
            "\n",
            "Downloading (…)olve/main/vocab.json: 100% 1.04M/1.04M [00:00<00:00, 9.18MB/s]\n",
            "Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 4.90MB/s]\n",
            "Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 14.3MB/s]\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file vocab.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/vocab.json\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file merges.txt from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/merges.txt\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file tokenizer.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer.json\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file added_tokens.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file special_tokens_map.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file tokenizer_config.json from cache at None\n",
            "[INFO|configuration_utils.py:653] 2023-02-14 21:56:31,525 >> loading configuration file config.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
            "[INFO|configuration_utils.py:705] 2023-02-14 21:56:31,526 >> Model config GPT2Config {\n",
            "  \"_name_or_path\": \"gpt2\",\n",
            "  \"activation_function\": \"gelu_new\",\n",
            "  \"architectures\": [\n",
            "    \"GPT2LMHeadModel\"\n",
            "  ],\n",
            "  \"attn_pdrop\": 0.1,\n",
            "  \"bos_token_id\": 50256,\n",
            "  \"embd_pdrop\": 0.1,\n",
            "  \"eos_token_id\": 50256,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"layer_norm_epsilon\": 1e-05,\n",
            "  \"model_type\": \"gpt2\",\n",
            "  \"n_ctx\": 1024,\n",
            "  \"n_embd\": 768,\n",
            "  \"n_head\": 12,\n",
            "  \"n_inner\": null,\n",
            "  \"n_layer\": 12,\n",
            "  \"n_positions\": 1024,\n",
            "  \"reorder_and_upcast_attn\": false,\n",
            "  \"resid_pdrop\": 0.1,\n",
            "  \"scale_attn_by_inverse_layer_idx\": false,\n",
            "  \"scale_attn_weights\": true,\n",
            "  \"summary_activation\": null,\n",
            "  \"summary_first_dropout\": 0.1,\n",
            "  \"summary_proj_to_labels\": true,\n",
            "  \"summary_type\": \"cls_index\",\n",
            "  \"summary_use_proj\": true,\n",
            "  \"task_specific_params\": {\n",
            "    \"text-generation\": {\n",
            "      \"do_sample\": true,\n",
            "      \"max_length\": 50\n",
            "    }\n",
            "  },\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 50257\n",
            "}\n",
            "\n",
            "INFO:__main__:Using hidden states in model: False\n",
            "INFO:__main__:Using implementation from class: GPT2ForSequenceClassificationCustom\n",
            "Downloading (…)\"pytorch_model.bin\";: 100% 548M/548M [00:05<00:00, 108MB/s]\n",
            "[INFO|modeling_utils.py:2156] 2023-02-14 21:56:36,895 >> loading weights file pytorch_model.bin from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin\n",
            "[INFO|modeling_utils.py:2606] 2023-02-14 21:56:39,410 >> All model checkpoint weights were used when initializing GPT2ForSequenceClassificationCustom.\n",
            "\n",
            "[WARNING|modeling_utils.py:2608] 2023-02-14 21:56:39,410 >> Some weights of GPT2ForSequenceClassificationCustom were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.dense_1_hidden.bias', 'score.dense_1_input.weight', 'score.dense_2.bias', 'score.dense_2.weight', 'score.out_proj.weight', 'score.dense_1_hidden.weight', 'score.dense_1_input.bias']\n",
            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
            "[ERROR|tokenization_utils_base.py:1019] 2023-02-14 21:56:39,418 >> Using pad_token, but it is not set yet.\n",
            "INFO:__main__:Set PAD token to EOS: <|endoftext|>\n",
            "Running tokenizer on dataset:   0% 0/16 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-bb8faaac56c0b87e.arrow\n",
            "Running tokenizer on dataset: 100% 16/16 [00:00<00:00, 19.61ba/s]\n",
            "Running tokenizer on dataset:   0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-7b339bb99d7c17a1.arrow\n",
            "Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 20.48ba/s]\n",
            "Running tokenizer on dataset:   0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-82acdaa33d6aa0eb.arrow\n",
            "Running tokenizer on dataset: 100% 2/2 [00:00<00:00,  7.71ba/s]\n",
            "INFO:__main__:Sample 10476 of the training set: {'label': 0, 'text': 'i do find new friends i m going to try extra hard to make them stay and if i decide that i don t want to feel hurt again and just ride out the last year of school on my own i m going to have to try extra hard not to care what people think of me being a loner', 'input_ids': [72, 466, 1064, 649, 2460, 1312, 285, 1016, 284, 1949, 3131, 1327, 284, 787, 606, 2652, 290, 611, 1312, 5409, 326, 1312, 836, 256, 765, 284, 1254, 5938, 757, 290, 655, 6594, 503, 262, 938, 614, 286, 1524, 319, 616, 898, 1312, 285, 1016, 284, 423, 284, 1949, 3131, 1327, 407, 284, 1337, 644, 661, 892, 286, 502, 852, 257, 300, 14491, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
            "INFO:__main__:Sample 1824 of the training set: {'label': 1, 'text': 'i asked them to join me in creating a world where all year old girls could grow up feeling hopeful and powerful', 'input_ids': [72, 1965, 606, 284, 4654, 502, 287, 4441, 257, 995, 810, 477, 614, 1468, 4813, 714, 1663, 510, 4203, 17836, 290, 3665, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
            "INFO:__main__:Sample 409 of the training set: {'label': 2, 'text': 'i feel when you are a caring person you attract other caring people into your life', 'input_ids': [72, 1254, 618, 345, 389, 257, 18088, 1048, 345, 4729, 584, 18088, 661, 656, 534, 1204, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
            "[INFO|trainer.py:503] 2023-02-14 21:56:42,941 >> max_steps is given, it will override any value given in num_train_epochs\n",
            "[INFO|trainer.py:725] 2023-02-14 21:56:42,941 >> The following columns in the training set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.\n",
            "/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
            "  warnings.warn(\n",
            "[INFO|trainer.py:1607] 2023-02-14 21:56:42,947 >> ***** Running training *****\n",
            "[INFO|trainer.py:1608] 2023-02-14 21:56:42,947 >>   Num examples = 16000\n",
            "[INFO|trainer.py:1609] 2023-02-14 21:56:42,947 >>   Num Epochs = 4\n",
            "[INFO|trainer.py:1610] 2023-02-14 21:56:42,947 >>   Instantaneous batch size per device = 24\n",
            "[INFO|trainer.py:1611] 2023-02-14 21:56:42,947 >>   Total train batch size (w. parallel, distributed & accumulation) = 24\n",
            "[INFO|trainer.py:1612] 2023-02-14 21:56:42,947 >>   Gradient Accumulation steps = 1\n",
            "[INFO|trainer.py:1613] 2023-02-14 21:56:42,947 >>   Total optimization steps = 2500\n",
            "{'loss': 1.6218, 'learning_rate': 1.9200000000000003e-05, 'epoch': 0.15}\n",
            "{'loss': 1.1593, 'learning_rate': 1.8400000000000003e-05, 'epoch': 0.3}\n",
            " 10% 250/2500 [00:39<05:43,  6.56it/s][INFO|trainer.py:725] 2023-02-14 21:57:22,025 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:57:22,027 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:57:22,027 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:57:22,027 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 26.97it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 22.99it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 21.78it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 21.18it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 20.86it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 20.66it/s]\u001b[A\n",
            " 26% 22/84 [00:01<00:03, 20.55it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 20.44it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 20.32it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 20.32it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 20.31it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 20.30it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 20.31it/s]\u001b[A\n",
            " 51% 43/84 [00:02<00:02, 20.32it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 20.29it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 20.27it/s]\u001b[A\n",
            " 76% 64/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 20.23it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 20.22it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 20.23it/s]\u001b[A\n",
            " 98% 82/84 [00:03<00:00, 20.22it/s]\u001b[A\n",
            "{'eval_loss': 0.6981180310249329, 'eval_accuracy': 0.7329999804496765, 'eval_runtime': 4.1201, 'eval_samples_per_second': 485.426, 'eval_steps_per_second': 20.388, 'epoch': 0.37}\n",
            "\n",
            " 10% 250/2500 [00:43<05:43,  6.56it/s]\n",
            "{'loss': 0.8016, 'learning_rate': 1.76e-05, 'epoch': 0.45}\n",
            "{'loss': 0.5481, 'learning_rate': 1.6800000000000002e-05, 'epoch': 0.6}\n",
            "{'loss': 0.4045, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.75}\n",
            " 20% 500/2500 [01:21<05:03,  6.58it/s][INFO|trainer.py:725] 2023-02-14 21:58:04,246 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:58:04,248 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:58:04,248 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:58:04,248 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 26.97it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 23.02it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 21.78it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 21.20it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 20.86it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 20.19it/s]\u001b[A\n",
            " 26% 22/84 [00:01<00:03, 20.20it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 20.21it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 20.22it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 20.23it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 20.23it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 20.24it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 20.25it/s]\u001b[A\n",
            " 51% 43/84 [00:02<00:02, 20.24it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 20.25it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 20.24it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 20.26it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 20.25it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 20.25it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 20.24it/s]\u001b[A\n",
            " 76% 64/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 20.24it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
            " 98% 82/84 [00:04<00:00, 20.24it/s]\u001b[A\n",
            "{'eval_loss': 0.29522550106048584, 'eval_accuracy': 0.9100000262260437, 'eval_runtime': 4.1309, 'eval_samples_per_second': 484.153, 'eval_steps_per_second': 20.334, 'epoch': 0.75}\n",
            "\n",
            " 20% 500/2500 [01:25<05:03,  6.58it/s]\n",
            "                                   \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:58:08,380 >> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-500\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 21:58:08,381 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-500/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 21:58:09,983 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-500/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:58:09,984 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-500/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:58:09,984 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-500/special_tokens_map.json\n",
            "{'loss': 0.356, 'learning_rate': 1.5200000000000002e-05, 'epoch': 0.9}\n",
            "{'loss': 0.2714, 'learning_rate': 1.4400000000000001e-05, 'epoch': 1.05}\n",
            " 30% 750/2500 [02:07<04:25,  6.59it/s][INFO|trainer.py:725] 2023-02-14 21:58:49,972 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:58:49,973 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:58:49,974 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:58:49,974 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 27.06it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 23.11it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 21.85it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 21.25it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 20.89it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 20.67it/s]\u001b[A\n",
            " 26% 22/84 [00:01<00:03, 20.56it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 20.48it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 20.42it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 20.39it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 20.37it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 20.34it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 20.31it/s]\u001b[A\n",
            " 51% 43/84 [00:02<00:02, 20.32it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 20.29it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 20.25it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 20.27it/s]\u001b[A\n",
            " 76% 64/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 20.31it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
            " 98% 82/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
            "{'eval_loss': 0.22870442271232605, 'eval_accuracy': 0.9200000166893005, 'eval_runtime': 4.1118, 'eval_samples_per_second': 486.403, 'eval_steps_per_second': 20.429, 'epoch': 1.12}\n",
            "\n",
            " 30% 750/2500 [02:11<04:25,  6.59it/s]\n",
            "{'loss': 0.2332, 'learning_rate': 1.3600000000000002e-05, 'epoch': 1.2}\n",
            "{'loss': 0.2135, 'learning_rate': 1.2800000000000001e-05, 'epoch': 1.35}\n",
            "{'loss': 0.2283, 'learning_rate': 1.2e-05, 'epoch': 1.5}\n",
            " 40% 1000/2500 [02:49<03:48,  6.57it/s][INFO|trainer.py:725] 2023-02-14 21:59:32,169 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 21:59:32,170 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 21:59:32,170 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 21:59:32,171 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 27.03it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 23.07it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 21.78it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 21.17it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 20.84it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 20.62it/s]\u001b[A\n",
            " 26% 22/84 [00:01<00:03, 20.52it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 20.39it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 20.36it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 20.33it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 20.31it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 20.28it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 20.30it/s]\u001b[A\n",
            " 51% 43/84 [00:02<00:02, 20.14it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 20.18it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 20.20it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 20.22it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 20.24it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 20.26it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
            " 76% 64/84 [00:03<00:00, 20.29it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 20.31it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
            " 98% 82/84 [00:04<00:00, 20.25it/s]\u001b[A\n",
            "{'eval_loss': 0.16501356661319733, 'eval_accuracy': 0.9319999814033508, 'eval_runtime': 4.1217, 'eval_samples_per_second': 485.232, 'eval_steps_per_second': 20.38, 'epoch': 1.5}\n",
            "\n",
            " 40% 1000/2500 [02:53<03:48,  6.57it/s]\n",
            "                                   \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:59:36,293 >> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-1000\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 21:59:36,294 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-1000/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 21:59:37,744 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-1000/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:59:37,744 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-1000/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:59:37,744 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-1000/special_tokens_map.json\n",
            "{'loss': 0.1836, 'learning_rate': 1.1200000000000001e-05, 'epoch': 1.65}\n",
            "{'loss': 0.1844, 'learning_rate': 1.04e-05, 'epoch': 1.8}\n",
            " 50% 1250/2500 [03:34<03:09,  6.59it/s][INFO|trainer.py:725] 2023-02-14 22:00:17,827 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 22:00:17,829 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:00:17,829 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:00:17,829 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 27.06it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 23.06it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 21.79it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 21.21it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 20.88it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 20.65it/s]\u001b[A\n",
            " 26% 22/84 [00:01<00:03, 20.55it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 20.47it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 20.34it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 20.30it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 20.27it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 20.28it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 20.26it/s]\u001b[A\n",
            " 51% 43/84 [00:02<00:02, 20.26it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 20.29it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 20.29it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
            " 76% 64/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
            " 98% 82/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
            "{'eval_loss': 0.15909001231193542, 'eval_accuracy': 0.9355000257492065, 'eval_runtime': 4.1177, 'eval_samples_per_second': 485.712, 'eval_steps_per_second': 20.4, 'epoch': 1.87}\n",
            "\n",
            " 50% 1250/2500 [03:38<03:09,  6.59it/s]\n",
            "{'loss': 0.2181, 'learning_rate': 9.600000000000001e-06, 'epoch': 1.95}\n",
            "{'loss': 0.1695, 'learning_rate': 8.8e-06, 'epoch': 2.1}\n",
            "{'loss': 0.1683, 'learning_rate': 8.000000000000001e-06, 'epoch': 2.25}\n",
            " 60% 1500/2500 [04:17<02:32,  6.55it/s][INFO|trainer.py:725] 2023-02-14 22:00:59,986 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 22:00:59,988 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:00:59,988 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:00:59,988 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 27.10it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 23.06it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 21.79it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 21.16it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 20.86it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 20.65it/s]\u001b[A\n",
            " 26% 22/84 [00:01<00:03, 20.52it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 20.45it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 20.30it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 20.24it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 20.11it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 20.12it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 20.17it/s]\u001b[A\n",
            " 51% 43/84 [00:02<00:02, 20.19it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 20.22it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 20.20it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 20.23it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 20.23it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 20.24it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 20.22it/s]\u001b[A\n",
            " 76% 64/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 20.24it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 20.23it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 20.20it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 20.22it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 20.20it/s]\u001b[A\n",
            " 98% 82/84 [00:04<00:00, 20.20it/s]\u001b[A\n",
            "{'eval_loss': 0.1472882628440857, 'eval_accuracy': 0.934499979019165, 'eval_runtime': 4.13, 'eval_samples_per_second': 484.258, 'eval_steps_per_second': 20.339, 'epoch': 2.25}\n",
            "\n",
            " 60% 1500/2500 [04:21<02:32,  6.55it/s]\n",
            "                                   \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:01:04,119 >> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-1500\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 22:01:04,120 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-1500/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 22:01:05,576 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-1500/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:01:05,576 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-1500/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:01:05,576 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-1500/special_tokens_map.json\n",
            "{'loss': 0.1497, 'learning_rate': 7.2000000000000005e-06, 'epoch': 2.4}\n",
            "{'loss': 0.1496, 'learning_rate': 6.4000000000000006e-06, 'epoch': 2.55}\n",
            " 70% 1750/2500 [05:02<01:54,  6.54it/s][INFO|trainer.py:725] 2023-02-14 22:01:45,617 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 22:01:45,618 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:01:45,619 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:01:45,619 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 26.78it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 22.79it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 21.58it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 21.03it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 20.70it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 20.49it/s]\u001b[A\n",
            " 26% 22/84 [00:01<00:03, 20.30it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 20.22it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 20.19it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 20.16it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 20.15it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 20.14it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 20.12it/s]\u001b[A\n",
            " 51% 43/84 [00:02<00:02, 20.09it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 20.08it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 20.10it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 20.13it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 20.19it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 20.20it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 20.22it/s]\u001b[A\n",
            " 76% 64/84 [00:03<00:00, 20.21it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 20.22it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
            " 98% 82/84 [00:04<00:00, 20.25it/s]\u001b[A\n",
            "{'eval_loss': 0.14743593335151672, 'eval_accuracy': 0.9359999895095825, 'eval_runtime': 4.1413, 'eval_samples_per_second': 482.944, 'eval_steps_per_second': 20.284, 'epoch': 2.62}\n",
            "\n",
            " 70% 1750/2500 [05:06<01:54,  6.54it/s]\n",
            "{'loss': 0.1465, 'learning_rate': 5.600000000000001e-06, 'epoch': 2.7}\n",
            "{'loss': 0.1376, 'learning_rate': 4.800000000000001e-06, 'epoch': 2.85}\n",
            "{'loss': 0.1444, 'learning_rate': 4.000000000000001e-06, 'epoch': 3.0}\n",
            " 80% 2000/2500 [05:44<01:16,  6.57it/s][INFO|trainer.py:725] 2023-02-14 22:02:27,845 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 22:02:27,846 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:02:27,846 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:02:27,846 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 27.04it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 23.04it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 21.75it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 21.18it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 20.85it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 20.61it/s]\u001b[A\n",
            " 26% 22/84 [00:01<00:03, 20.49it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 20.43it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 20.39it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 20.14it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 20.16it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 20.21it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 20.22it/s]\u001b[A\n",
            " 51% 43/84 [00:02<00:02, 20.22it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 20.20it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 20.19it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 20.20it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 20.22it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 20.24it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 20.24it/s]\u001b[A\n",
            " 76% 64/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 20.24it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 20.21it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 20.21it/s]\u001b[A\n",
            " 98% 82/84 [00:04<00:00, 20.20it/s]\u001b[A\n",
            "{'eval_loss': 0.14364145696163177, 'eval_accuracy': 0.9365000128746033, 'eval_runtime': 4.1279, 'eval_samples_per_second': 484.505, 'eval_steps_per_second': 20.349, 'epoch': 3.0}\n",
            "\n",
            " 80% 2000/2500 [05:49<01:16,  6.57it/s]\n",
            "                                   \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:02:31,975 >> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-2000\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 22:02:31,976 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-2000/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 22:02:33,429 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-2000/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:02:33,430 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-2000/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:02:33,430 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-2000/special_tokens_map.json\n",
            "{'loss': 0.104, 'learning_rate': 3.2000000000000003e-06, 'epoch': 3.15}\n",
            "{'loss': 0.1206, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.3}\n",
            " 90% 2250/2500 [06:30<00:38,  6.55it/s][INFO|trainer.py:725] 2023-02-14 22:03:13,484 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 22:03:13,486 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:03:13,486 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:03:13,486 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 27.11it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 23.10it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 21.81it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 21.22it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 20.88it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 20.68it/s]\u001b[A\n",
            " 26% 22/84 [00:01<00:03, 20.56it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 20.47it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 20.41it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 20.38it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 20.34it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 20.34it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 20.33it/s]\u001b[A\n",
            " 51% 43/84 [00:02<00:02, 20.26it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 20.26it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 20.17it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 20.21it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 20.21it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 20.23it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 20.25it/s]\u001b[A\n",
            " 76% 64/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 20.29it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
            " 98% 82/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
            "{'eval_loss': 0.15543130040168762, 'eval_accuracy': 0.9369999766349792, 'eval_runtime': 4.1171, 'eval_samples_per_second': 485.782, 'eval_steps_per_second': 20.403, 'epoch': 3.37}\n",
            "\n",
            " 90% 2250/2500 [06:34<00:38,  6.55it/s]\n",
            "{'loss': 0.1289, 'learning_rate': 1.6000000000000001e-06, 'epoch': 3.45}\n",
            "{'loss': 0.1231, 'learning_rate': 8.000000000000001e-07, 'epoch': 3.6}\n",
            "{'loss': 0.1179, 'learning_rate': 0.0, 'epoch': 3.75}\n",
            "100% 2500/2500 [07:12<00:00,  6.57it/s][INFO|trainer.py:725] 2023-02-14 22:03:55,704 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 22:03:55,705 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:03:55,705 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:03:55,706 >>   Batch size = 24\n",
            "\n",
            "  0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
            "  5% 4/84 [00:00<00:02, 27.06it/s]\u001b[A\n",
            "  8% 7/84 [00:00<00:03, 23.11it/s]\u001b[A\n",
            " 12% 10/84 [00:00<00:03, 21.81it/s]\u001b[A\n",
            " 15% 13/84 [00:00<00:03, 21.13it/s]\u001b[A\n",
            " 19% 16/84 [00:00<00:03, 20.82it/s]\u001b[A\n",
            " 23% 19/84 [00:00<00:03, 20.65it/s]\u001b[A\n",
            " 26% 22/84 [00:01<00:03, 20.47it/s]\u001b[A\n",
            " 30% 25/84 [00:01<00:02, 20.41it/s]\u001b[A\n",
            " 33% 28/84 [00:01<00:02, 20.38it/s]\u001b[A\n",
            " 37% 31/84 [00:01<00:02, 20.35it/s]\u001b[A\n",
            " 40% 34/84 [00:01<00:02, 20.35it/s]\u001b[A\n",
            " 44% 37/84 [00:01<00:02, 20.32it/s]\u001b[A\n",
            " 48% 40/84 [00:01<00:02, 20.30it/s]\u001b[A\n",
            " 51% 43/84 [00:02<00:02, 20.30it/s]\u001b[A\n",
            " 55% 46/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
            " 58% 49/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
            " 62% 52/84 [00:02<00:01, 20.29it/s]\u001b[A\n",
            " 65% 55/84 [00:02<00:01, 20.31it/s]\u001b[A\n",
            " 69% 58/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
            " 73% 61/84 [00:02<00:01, 20.26it/s]\u001b[A\n",
            " 76% 64/84 [00:03<00:00, 20.24it/s]\u001b[A\n",
            " 80% 67/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
            " 83% 70/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
            " 87% 73/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
            " 90% 76/84 [00:03<00:00, 20.29it/s]\u001b[A\n",
            " 94% 79/84 [00:03<00:00, 20.29it/s]\u001b[A\n",
            " 98% 82/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
            "{'eval_loss': 0.14437170326709747, 'eval_accuracy': 0.9350000023841858, 'eval_runtime': 4.116, 'eval_samples_per_second': 485.915, 'eval_steps_per_second': 20.408, 'epoch': 3.75}\n",
            "\n",
            "100% 2500/2500 [07:16<00:00,  6.57it/s]\n",
            "                                   \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:03:59,822 >> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-2500\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 22:03:59,823 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-2500/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 22:04:00,568 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-2500/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:04:00,569 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-2500/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:04:00,569 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-2500/special_tokens_map.json\n",
            "[INFO|trainer.py:1852] 2023-02-14 22:04:02,582 >> \n",
            "\n",
            "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
            "\n",
            "\n",
            "[INFO|trainer.py:1946] 2023-02-14 22:04:02,582 >> Loading best model from out/emotion/gpt2_custom/checkpoint-2000 (score: 0.9365000128746033).\n",
            "{'train_runtime': 440.0758, 'train_samples_per_second': 136.34, 'train_steps_per_second': 5.681, 'train_loss': 0.32335229415893557, 'epoch': 3.75}\n",
            "100% 2500/2500 [07:20<00:00,  5.68it/s]\n",
            "[INFO|trainer.py:2656] 2023-02-14 22:04:03,025 >> Saving model checkpoint to out/emotion/gpt2_custom\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 22:04:03,026 >> Configuration saved in out/emotion/gpt2_custom/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 22:04:03,965 >> Model weights saved in out/emotion/gpt2_custom/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:04:03,966 >> tokenizer config file saved in out/emotion/gpt2_custom/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:04:03,966 >> Special tokens file saved in out/emotion/gpt2_custom/special_tokens_map.json\n",
            "***** train metrics *****\n",
            "  epoch                    =       3.75\n",
            "  train_loss               =     0.3234\n",
            "  train_runtime            = 0:07:20.07\n",
            "  train_samples            =      16000\n",
            "  train_samples_per_second =     136.34\n",
            "  train_steps_per_second   =      5.681\n",
            "INFO:__main__:*** Evaluate ***\n",
            "[INFO|trainer.py:725] 2023-02-14 22:04:04,068 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 22:04:04,069 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:04:04,069 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:04:04,070 >>   Batch size = 24\n",
            "100% 84/84 [00:04<00:00, 20.35it/s]\n",
            "***** eval metrics *****\n",
            "  epoch                   =       3.75\n",
            "  eval_accuracy           =     0.9365\n",
            "  eval_loss               =     0.1436\n",
            "  eval_runtime            = 0:00:04.18\n",
            "  eval_samples            =       2000\n",
            "  eval_samples_per_second =    477.778\n",
            "  eval_steps_per_second   =     20.067\n",
            "INFO:__main__:*** Predict ***\n",
            "[INFO|trainer.py:725] 2023-02-14 22:04:08,259 >> The following columns in the test set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2907] 2023-02-14 22:04:08,260 >> ***** Running Prediction *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:04:08,260 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:04:08,260 >>   Batch size = 24\n",
            "100% 84/84 [00:04<00:00, 20.62it/s]\n",
            "INFO:__main__:***** Predict results None *****\n",
            "[INFO|modelcard.py:444] 2023-02-14 22:04:12,537 >> Dropping the following result as it does not have all the necessary fields:\n",
            "{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.9365000128746033}]}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "VrHmnOaT7ICl"
      },
      "source": [
        "## **T5**"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "CmuDde477ICl"
      },
      "source": [
        "- full data\n",
        "- model `T5`\n",
        "- sequnece length: 128\n",
        "- training epoch: 1\n",
        "- first few layers frozen"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 12,
      "metadata": {
        "id": "2ruXjeqj7ICl",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "4d73b407-08c3-4007-aa32-c8709dd696fa"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "2023-02-14 22:04:17.129470: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA\n",
            "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
            "2023-02-14 22:04:17.281426: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
            "2023-02-14 22:04:18.087509: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
            "2023-02-14 22:04:18.087605: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
            "2023-02-14 22:04:18.087624: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
            "WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
            "INFO:__main__:Training/evaluation parameters Seq2SeqTrainingArguments(\n",
            "_n_gpu=1,\n",
            "adafactor=False,\n",
            "adam_beta1=0.9,\n",
            "adam_beta2=0.999,\n",
            "adam_epsilon=1e-08,\n",
            "auto_find_batch_size=False,\n",
            "bf16=False,\n",
            "bf16_full_eval=False,\n",
            "data_seed=None,\n",
            "dataloader_drop_last=False,\n",
            "dataloader_num_workers=0,\n",
            "dataloader_pin_memory=True,\n",
            "ddp_bucket_cap_mb=None,\n",
            "ddp_find_unused_parameters=None,\n",
            "ddp_timeout=1800,\n",
            "debug=[],\n",
            "deepspeed=None,\n",
            "disable_tqdm=False,\n",
            "do_eval=True,\n",
            "do_predict=True,\n",
            "do_train=True,\n",
            "eval_accumulation_steps=None,\n",
            "eval_delay=0,\n",
            "eval_steps=250,\n",
            "evaluation_strategy=steps,\n",
            "fp16=False,\n",
            "fp16_backend=auto,\n",
            "fp16_full_eval=False,\n",
            "fp16_opt_level=O1,\n",
            "fsdp=[],\n",
            "fsdp_min_num_params=0,\n",
            "fsdp_transformer_layer_cls_to_wrap=None,\n",
            "full_determinism=False,\n",
            "generation_max_length=128,\n",
            "generation_num_beams=None,\n",
            "gradient_accumulation_steps=1,\n",
            "gradient_checkpointing=False,\n",
            "greater_is_better=True,\n",
            "group_by_length=False,\n",
            "half_precision_backend=auto,\n",
            "hub_model_id=None,\n",
            "hub_private_repo=False,\n",
            "hub_strategy=every_save,\n",
            "hub_token=<HUB_TOKEN>,\n",
            "ignore_data_skip=False,\n",
            "include_inputs_for_metrics=False,\n",
            "jit_mode_eval=False,\n",
            "label_names=None,\n",
            "label_smoothing_factor=0.0,\n",
            "learning_rate=5e-05,\n",
            "length_column_name=length,\n",
            "load_best_model_at_end=True,\n",
            "local_rank=-1,\n",
            "log_level=passive,\n",
            "log_level_replica=passive,\n",
            "log_on_each_node=True,\n",
            "logging_dir=out/emotion/t5_v1_1/runs/Feb14_22-04-20_fc0011e45a00,\n",
            "logging_first_step=False,\n",
            "logging_nan_inf_filter=True,\n",
            "logging_steps=100,\n",
            "logging_strategy=steps,\n",
            "lr_scheduler_type=linear,\n",
            "max_grad_norm=1.0,\n",
            "max_steps=2500,\n",
            "metric_for_best_model=accuracy,\n",
            "mp_parameters=,\n",
            "no_cuda=False,\n",
            "num_train_epochs=1.0,\n",
            "optim=adamw_hf,\n",
            "output_dir=out/emotion/t5_v1_1,\n",
            "overwrite_output_dir=True,\n",
            "past_index=-1,\n",
            "per_device_eval_batch_size=8,\n",
            "per_device_train_batch_size=8,\n",
            "predict_with_generate=True,\n",
            "prediction_loss_only=False,\n",
            "push_to_hub=False,\n",
            "push_to_hub_model_id=None,\n",
            "push_to_hub_organization=None,\n",
            "push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
            "ray_scope=last,\n",
            "remove_unused_columns=True,\n",
            "report_to=['tensorboard'],\n",
            "resume_from_checkpoint=None,\n",
            "run_name=out/emotion/t5_v1_1,\n",
            "save_on_each_node=False,\n",
            "save_steps=500,\n",
            "save_strategy=steps,\n",
            "save_total_limit=5,\n",
            "seed=42,\n",
            "sharded_ddp=[],\n",
            "skip_memory_metrics=True,\n",
            "sortish_sampler=False,\n",
            "tf32=None,\n",
            "torchdynamo=None,\n",
            "tpu_metrics_debug=False,\n",
            "tpu_num_cores=None,\n",
            "use_ipex=False,\n",
            "use_legacy_prediction_loop=False,\n",
            "use_mps_device=False,\n",
            "warmup_ratio=0.0,\n",
            "warmup_steps=0,\n",
            "weight_decay=0.0,\n",
            "xpu_backend=None,\n",
            ")\n",
            "WARNING:datasets.builder:Using custom data configuration default-a82ca4164dba097e\n",
            "INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n",
            "INFO:datasets.builder:Generating dataset json (/content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
            "Downloading and preparing dataset json/default to /content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n",
            "Downloading data files: 100% 3/3 [00:00<00:00, 11848.32it/s]\n",
            "INFO:datasets.download.download_manager:Downloading took 0.0 min\n",
            "INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n",
            "Extracting data files: 100% 3/3 [00:00<00:00, 2097.85it/s]\n",
            "INFO:datasets.utils.info_utils:Unable to verify checksums.\n",
            "INFO:datasets.builder:Generating train split\n",
            "INFO:datasets.builder:Generating validation split\n",
            "INFO:datasets.builder:Generating test split\n",
            "INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n",
            "Dataset json downloaded and prepared to /content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n",
            "100% 3/3 [00:00<00:00, 953.83it/s]\n",
            "Downloading (…)lve/main/config.json: 100% 537/537 [00:00<00:00, 97.0kB/s]\n",
            "[INFO|configuration_utils.py:653] 2023-02-14 22:04:20,972 >> loading configuration file config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json\n",
            "[INFO|configuration_utils.py:705] 2023-02-14 22:04:20,975 >> Model config T5Config {\n",
            "  \"_name_or_path\": \"google/t5-v1_1-small\",\n",
            "  \"architectures\": [\n",
            "    \"T5ForConditionalGeneration\"\n",
            "  ],\n",
            "  \"d_ff\": 1024,\n",
            "  \"d_kv\": 64,\n",
            "  \"d_model\": 512,\n",
            "  \"decoder_start_token_id\": 0,\n",
            "  \"dense_act_fn\": \"gelu_new\",\n",
            "  \"dropout_rate\": 0.1,\n",
            "  \"eos_token_id\": 1,\n",
            "  \"feed_forward_proj\": \"gated-gelu\",\n",
            "  \"initializer_factor\": 1.0,\n",
            "  \"is_encoder_decoder\": true,\n",
            "  \"is_gated_act\": true,\n",
            "  \"layer_norm_epsilon\": 1e-06,\n",
            "  \"model_type\": \"t5\",\n",
            "  \"num_decoder_layers\": 8,\n",
            "  \"num_heads\": 6,\n",
            "  \"num_layers\": 8,\n",
            "  \"output_past\": true,\n",
            "  \"pad_token_id\": 0,\n",
            "  \"relative_attention_max_distance\": 128,\n",
            "  \"relative_attention_num_buckets\": 32,\n",
            "  \"tie_word_embeddings\": false,\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 32128\n",
            "}\n",
            "\n",
            "Downloading (…)okenizer_config.json: 100% 1.86k/1.86k [00:00<00:00, 853kB/s]\n",
            "[INFO|configuration_utils.py:653] 2023-02-14 22:04:21,160 >> loading configuration file config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json\n",
            "[INFO|configuration_utils.py:705] 2023-02-14 22:04:21,160 >> Model config T5Config {\n",
            "  \"_name_or_path\": \"google/t5-v1_1-small\",\n",
            "  \"architectures\": [\n",
            "    \"T5ForConditionalGeneration\"\n",
            "  ],\n",
            "  \"d_ff\": 1024,\n",
            "  \"d_kv\": 64,\n",
            "  \"d_model\": 512,\n",
            "  \"decoder_start_token_id\": 0,\n",
            "  \"dense_act_fn\": \"gelu_new\",\n",
            "  \"dropout_rate\": 0.1,\n",
            "  \"eos_token_id\": 1,\n",
            "  \"feed_forward_proj\": \"gated-gelu\",\n",
            "  \"initializer_factor\": 1.0,\n",
            "  \"is_encoder_decoder\": true,\n",
            "  \"is_gated_act\": true,\n",
            "  \"layer_norm_epsilon\": 1e-06,\n",
            "  \"model_type\": \"t5\",\n",
            "  \"num_decoder_layers\": 8,\n",
            "  \"num_heads\": 6,\n",
            "  \"num_layers\": 8,\n",
            "  \"output_past\": true,\n",
            "  \"pad_token_id\": 0,\n",
            "  \"relative_attention_max_distance\": 128,\n",
            "  \"relative_attention_num_buckets\": 32,\n",
            "  \"tie_word_embeddings\": false,\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 32128\n",
            "}\n",
            "\n",
            "Downloading (…)ve/main/spiece.model: 100% 792k/792k [00:00<00:00, 10.2MB/s]\n",
            "Downloading (…)cial_tokens_map.json: 100% 1.79k/1.79k [00:00<00:00, 705kB/s]\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file spiece.model from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/spiece.model\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file tokenizer.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file added_tokens.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file special_tokens_map.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/special_tokens_map.json\n",
            "[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file tokenizer_config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/tokenizer_config.json\n",
            "[INFO|configuration_utils.py:653] 2023-02-14 22:04:21,838 >> loading configuration file config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json\n",
            "[INFO|configuration_utils.py:705] 2023-02-14 22:04:21,838 >> Model config T5Config {\n",
            "  \"_name_or_path\": \"google/t5-v1_1-small\",\n",
            "  \"architectures\": [\n",
            "    \"T5ForConditionalGeneration\"\n",
            "  ],\n",
            "  \"d_ff\": 1024,\n",
            "  \"d_kv\": 64,\n",
            "  \"d_model\": 512,\n",
            "  \"decoder_start_token_id\": 0,\n",
            "  \"dense_act_fn\": \"gelu_new\",\n",
            "  \"dropout_rate\": 0.1,\n",
            "  \"eos_token_id\": 1,\n",
            "  \"feed_forward_proj\": \"gated-gelu\",\n",
            "  \"initializer_factor\": 1.0,\n",
            "  \"is_encoder_decoder\": true,\n",
            "  \"is_gated_act\": true,\n",
            "  \"layer_norm_epsilon\": 1e-06,\n",
            "  \"model_type\": \"t5\",\n",
            "  \"num_decoder_layers\": 8,\n",
            "  \"num_heads\": 6,\n",
            "  \"num_layers\": 8,\n",
            "  \"output_past\": true,\n",
            "  \"pad_token_id\": 0,\n",
            "  \"relative_attention_max_distance\": 128,\n",
            "  \"relative_attention_num_buckets\": 32,\n",
            "  \"tie_word_embeddings\": false,\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 32128\n",
            "}\n",
            "\n",
            "[INFO|configuration_utils.py:653] 2023-02-14 22:04:21,888 >> loading configuration file config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json\n",
            "[INFO|configuration_utils.py:705] 2023-02-14 22:04:21,889 >> Model config T5Config {\n",
            "  \"_name_or_path\": \"google/t5-v1_1-small\",\n",
            "  \"architectures\": [\n",
            "    \"T5ForConditionalGeneration\"\n",
            "  ],\n",
            "  \"d_ff\": 1024,\n",
            "  \"d_kv\": 64,\n",
            "  \"d_model\": 512,\n",
            "  \"decoder_start_token_id\": 0,\n",
            "  \"dense_act_fn\": \"gelu_new\",\n",
            "  \"dropout_rate\": 0.1,\n",
            "  \"eos_token_id\": 1,\n",
            "  \"feed_forward_proj\": \"gated-gelu\",\n",
            "  \"initializer_factor\": 1.0,\n",
            "  \"is_encoder_decoder\": true,\n",
            "  \"is_gated_act\": true,\n",
            "  \"layer_norm_epsilon\": 1e-06,\n",
            "  \"model_type\": \"t5\",\n",
            "  \"num_decoder_layers\": 8,\n",
            "  \"num_heads\": 6,\n",
            "  \"num_layers\": 8,\n",
            "  \"output_past\": true,\n",
            "  \"pad_token_id\": 0,\n",
            "  \"relative_attention_max_distance\": 128,\n",
            "  \"relative_attention_num_buckets\": 32,\n",
            "  \"tie_word_embeddings\": false,\n",
            "  \"transformers_version\": \"4.23.1\",\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 32128\n",
            "}\n",
            "\n",
            "Downloading (…)\"pytorch_model.bin\";: 100% 308M/308M [00:03<00:00, 84.8MB/s]\n",
            "[INFO|modeling_utils.py:2156] 2023-02-14 22:04:26,050 >> loading weights file pytorch_model.bin from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/pytorch_model.bin\n",
            "[INFO|modeling_utils.py:2606] 2023-02-14 22:04:27,048 >> All model checkpoint weights were used when initializing T5ForConditionalGeneration.\n",
            "\n",
            "[INFO|modeling_utils.py:2614] 2023-02-14 22:04:27,048 >> All the weights of T5ForConditionalGeneration were initialized from the model checkpoint at google/t5-v1_1-small.\n",
            "If your task is similar to the task the model of the checkpoint was trained on, you can already use T5ForConditionalGeneration for predictions without further training.\n",
            "\n",
            "\n",
            "Frozen layers:\n",
            "[('encoder.block.1.layer.0.SelfAttention.q.weight', False), ('encoder.block.1.layer.0.SelfAttention.k.weight', False), ('encoder.block.1.layer.0.SelfAttention.v.weight', False), ('encoder.block.1.layer.0.SelfAttention.o.weight', False), ('encoder.block.1.layer.0.layer_norm.weight', False), ('encoder.block.1.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.1.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.1.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.1.layer.1.layer_norm.weight', False), ('encoder.block.2.layer.0.SelfAttention.q.weight', False), ('encoder.block.2.layer.0.SelfAttention.k.weight', False), ('encoder.block.2.layer.0.SelfAttention.v.weight', False), ('encoder.block.2.layer.0.SelfAttention.o.weight', False), ('encoder.block.2.layer.0.layer_norm.weight', False), ('encoder.block.2.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.2.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.2.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.2.layer.1.layer_norm.weight', False), ('encoder.block.3.layer.0.SelfAttention.q.weight', False), ('encoder.block.3.layer.0.SelfAttention.k.weight', False), ('encoder.block.3.layer.0.SelfAttention.v.weight', False), ('encoder.block.3.layer.0.SelfAttention.o.weight', False), ('encoder.block.3.layer.0.layer_norm.weight', False), ('encoder.block.3.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.3.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.3.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.3.layer.1.layer_norm.weight', False), ('encoder.block.4.layer.0.SelfAttention.q.weight', False), ('encoder.block.4.layer.0.SelfAttention.k.weight', False), ('encoder.block.4.layer.0.SelfAttention.v.weight', False), ('encoder.block.4.layer.0.SelfAttention.o.weight', False), ('encoder.block.4.layer.0.layer_norm.weight', False), ('encoder.block.4.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.4.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.4.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.4.layer.1.layer_norm.weight', False), ('encoder.block.5.layer.0.SelfAttention.q.weight', False), ('encoder.block.5.layer.0.SelfAttention.k.weight', False), ('encoder.block.5.layer.0.SelfAttention.v.weight', False), ('encoder.block.5.layer.0.SelfAttention.o.weight', False), ('encoder.block.5.layer.0.layer_norm.weight', False), ('encoder.block.5.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.5.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.5.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.5.layer.1.layer_norm.weight', False), ('encoder.block.6.layer.0.SelfAttention.q.weight', False), ('encoder.block.6.layer.0.SelfAttention.k.weight', False), ('encoder.block.6.layer.0.SelfAttention.v.weight', False), ('encoder.block.6.layer.0.SelfAttention.o.weight', False), ('encoder.block.6.layer.0.layer_norm.weight', False), ('encoder.block.6.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.6.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.6.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.6.layer.1.layer_norm.weight', False), ('encoder.block.7.layer.0.SelfAttention.q.weight', False), ('encoder.block.7.layer.0.SelfAttention.k.weight', False), ('encoder.block.7.layer.0.SelfAttention.v.weight', False), ('encoder.block.7.layer.0.SelfAttention.o.weight', False), ('encoder.block.7.layer.0.layer_norm.weight', False), ('encoder.block.7.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.7.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.7.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.7.layer.1.layer_norm.weight', False)] \n",
            "\n",
            "\n",
            "INFO:__main__:Using translation prefix: \"emotion classification: \"\n",
            "Running tokenizer on train dataset:   0% 0/16 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-fa17416eabe18767.arrow\n",
            "Running tokenizer on train dataset: 100% 16/16 [00:00<00:00, 23.64ba/s]\n",
            "Running tokenizer on validation dataset:   0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-c6cebbf9290f7df0.arrow\n",
            "Running tokenizer on validation dataset: 100% 2/2 [00:00<00:00, 33.01ba/s]\n",
            "Running tokenizer on prediction dataset:   0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-988bff0993eee389.arrow\n",
            "Running tokenizer on prediction dataset: 100% 2/2 [00:00<00:00, 33.06ba/s]\n",
            "[INFO|trainer.py:503] 2023-02-14 22:04:30,902 >> max_steps is given, it will override any value given in num_train_epochs\n",
            "/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
            "  warnings.warn(\n",
            "[INFO|trainer.py:1607] 2023-02-14 22:04:30,911 >> ***** Running training *****\n",
            "[INFO|trainer.py:1608] 2023-02-14 22:04:30,911 >>   Num examples = 16000\n",
            "[INFO|trainer.py:1609] 2023-02-14 22:04:30,911 >>   Num Epochs = 2\n",
            "[INFO|trainer.py:1610] 2023-02-14 22:04:30,911 >>   Instantaneous batch size per device = 8\n",
            "[INFO|trainer.py:1611] 2023-02-14 22:04:30,911 >>   Total train batch size (w. parallel, distributed & accumulation) = 8\n",
            "[INFO|trainer.py:1612] 2023-02-14 22:04:30,911 >>   Gradient Accumulation steps = 1\n",
            "[INFO|trainer.py:1613] 2023-02-14 22:04:30,911 >>   Total optimization steps = 2500\n",
            "  0% 0/2500 [00:00<?, ?it/s][WARNING|logging.py:281] 2023-02-14 22:04:30,925 >> You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n",
            "{'loss': 21.5908, 'learning_rate': 4.8e-05, 'epoch': 0.05}\n",
            "{'loss': 14.8264, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.1}\n",
            " 10% 249/2500 [00:24<03:31, 10.64it/s][INFO|trainer.py:2907] 2023-02-14 22:04:55,366 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:04:55,366 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:04:55,366 >>   Batch size = 8\n",
            "\n",
            "  0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
            "  1% 3/250 [00:00<00:11, 21.87it/s]\u001b[A\n",
            "  2% 6/250 [00:00<00:14, 16.90it/s]\u001b[A\n",
            "  3% 8/250 [00:00<00:15, 15.84it/s]\u001b[A\n",
            "  4% 10/250 [00:00<00:15, 15.48it/s]\u001b[A\n",
            "  5% 12/250 [00:00<00:15, 15.16it/s]\u001b[A\n",
            "  6% 14/250 [00:00<00:15, 15.04it/s]\u001b[A\n",
            "  6% 16/250 [00:01<00:15, 14.99it/s]\u001b[A\n",
            "  7% 18/250 [00:01<00:15, 14.93it/s]\u001b[A\n",
            "  8% 20/250 [00:01<00:15, 14.86it/s]\u001b[A\n",
            "  9% 22/250 [00:01<00:15, 14.64it/s]\u001b[A\n",
            " 10% 24/250 [00:01<00:15, 14.61it/s]\u001b[A\n",
            " 10% 26/250 [00:01<00:15, 14.67it/s]\u001b[A\n",
            " 11% 28/250 [00:01<00:15, 14.63it/s]\u001b[A\n",
            " 12% 30/250 [00:01<00:15, 14.64it/s]\u001b[A\n",
            " 13% 32/250 [00:02<00:14, 14.69it/s]\u001b[A\n",
            " 14% 34/250 [00:02<00:14, 14.67it/s]\u001b[A\n",
            " 14% 36/250 [00:02<00:14, 14.63it/s]\u001b[A\n",
            " 15% 38/250 [00:02<00:14, 14.47it/s]\u001b[A\n",
            " 16% 40/250 [00:02<00:14, 14.49it/s]\u001b[A\n",
            " 17% 42/250 [00:02<00:14, 14.42it/s]\u001b[A\n",
            " 18% 44/250 [00:02<00:14, 14.46it/s]\u001b[A\n",
            " 18% 46/250 [00:03<00:14, 14.50it/s]\u001b[A\n",
            " 19% 48/250 [00:03<00:13, 14.59it/s]\u001b[A\n",
            " 20% 50/250 [00:03<00:13, 14.59it/s]\u001b[A\n",
            " 21% 52/250 [00:03<00:13, 14.57it/s]\u001b[A\n",
            " 22% 54/250 [00:03<00:13, 14.64it/s]\u001b[A\n",
            " 22% 56/250 [00:03<00:13, 14.64it/s]\u001b[A\n",
            " 23% 58/250 [00:03<00:13, 14.68it/s]\u001b[A\n",
            " 24% 60/250 [00:04<00:12, 14.73it/s]\u001b[A\n",
            " 25% 62/250 [00:04<00:12, 14.69it/s]\u001b[A\n",
            " 26% 64/250 [00:04<00:12, 14.70it/s]\u001b[A\n",
            " 26% 66/250 [00:04<00:12, 14.66it/s]\u001b[A\n",
            " 27% 68/250 [00:04<00:12, 14.72it/s]\u001b[A\n",
            " 28% 70/250 [00:04<00:12, 14.78it/s]\u001b[A\n",
            " 29% 72/250 [00:04<00:12, 14.72it/s]\u001b[A\n",
            " 30% 74/250 [00:04<00:11, 14.71it/s]\u001b[A\n",
            " 30% 76/250 [00:05<00:11, 14.75it/s]\u001b[A\n",
            " 31% 78/250 [00:05<00:11, 14.69it/s]\u001b[A\n",
            " 32% 80/250 [00:05<00:11, 14.67it/s]\u001b[A\n",
            " 33% 82/250 [00:05<00:11, 14.67it/s]\u001b[A\n",
            " 34% 84/250 [00:05<00:11, 14.65it/s]\u001b[A\n",
            " 34% 86/250 [00:05<00:11, 14.71it/s]\u001b[A\n",
            " 35% 88/250 [00:05<00:11, 14.73it/s]\u001b[A\n",
            " 36% 90/250 [00:06<00:10, 14.71it/s]\u001b[A\n",
            " 37% 92/250 [00:06<00:10, 14.58it/s]\u001b[A\n",
            " 38% 94/250 [00:06<00:10, 14.50it/s]\u001b[A\n",
            " 38% 96/250 [00:06<00:10, 14.51it/s]\u001b[A\n",
            " 39% 98/250 [00:06<00:10, 14.56it/s]\u001b[A\n",
            " 40% 100/250 [00:06<00:10, 14.58it/s]\u001b[A\n",
            " 41% 102/250 [00:06<00:10, 14.51it/s]\u001b[A\n",
            " 42% 104/250 [00:07<00:10, 14.39it/s]\u001b[A\n",
            " 42% 106/250 [00:07<00:10, 14.35it/s]\u001b[A\n",
            " 43% 108/250 [00:07<00:09, 14.47it/s]\u001b[A\n",
            " 44% 110/250 [00:07<00:09, 14.45it/s]\u001b[A\n",
            " 45% 112/250 [00:07<00:09, 14.40it/s]\u001b[A\n",
            " 46% 114/250 [00:07<00:09, 14.44it/s]\u001b[A\n",
            " 46% 116/250 [00:07<00:09, 14.52it/s]\u001b[A\n",
            " 47% 118/250 [00:08<00:09, 14.53it/s]\u001b[A\n",
            " 48% 120/250 [00:08<00:08, 14.55it/s]\u001b[A\n",
            " 49% 122/250 [00:08<00:08, 14.61it/s]\u001b[A\n",
            " 50% 124/250 [00:08<00:08, 14.64it/s]\u001b[A\n",
            " 50% 126/250 [00:08<00:08, 14.66it/s]\u001b[A\n",
            " 51% 128/250 [00:08<00:08, 14.61it/s]\u001b[A\n",
            " 52% 130/250 [00:08<00:08, 14.70it/s]\u001b[A\n",
            " 53% 132/250 [00:08<00:07, 14.78it/s]\u001b[A\n",
            " 54% 134/250 [00:09<00:07, 14.78it/s]\u001b[A\n",
            " 54% 136/250 [00:09<00:07, 14.73it/s]\u001b[A\n",
            " 55% 138/250 [00:09<00:07, 14.79it/s]\u001b[A\n",
            " 56% 140/250 [00:09<00:07, 14.64it/s]\u001b[A\n",
            " 57% 142/250 [00:09<00:07, 14.61it/s]\u001b[A\n",
            " 58% 144/250 [00:09<00:07, 14.67it/s]\u001b[A\n",
            " 58% 146/250 [00:09<00:07, 14.71it/s]\u001b[A\n",
            " 59% 148/250 [00:10<00:06, 14.71it/s]\u001b[A\n",
            " 60% 150/250 [00:10<00:06, 14.68it/s]\u001b[A\n",
            " 61% 152/250 [00:10<00:06, 14.72it/s]\u001b[A\n",
            " 62% 154/250 [00:10<00:06, 14.79it/s]\u001b[A\n",
            " 62% 156/250 [00:10<00:06, 14.37it/s]\u001b[A\n",
            " 63% 158/250 [00:10<00:06, 14.37it/s]\u001b[A\n",
            " 64% 160/250 [00:10<00:06, 14.45it/s]\u001b[A\n",
            " 65% 162/250 [00:11<00:06, 14.46it/s]\u001b[A\n",
            " 66% 164/250 [00:11<00:05, 14.55it/s]\u001b[A\n",
            " 66% 166/250 [00:11<00:05, 14.56it/s]\u001b[A\n",
            " 67% 168/250 [00:11<00:05, 14.60it/s]\u001b[A\n",
            " 68% 170/250 [00:11<00:05, 14.62it/s]\u001b[A\n",
            " 69% 172/250 [00:11<00:05, 14.21it/s]\u001b[A\n",
            " 70% 174/250 [00:11<00:05, 14.41it/s]\u001b[A\n",
            " 70% 176/250 [00:11<00:05, 14.53it/s]\u001b[A\n",
            " 71% 178/250 [00:12<00:04, 14.60it/s]\u001b[A\n",
            " 72% 180/250 [00:12<00:04, 14.64it/s]\u001b[A\n",
            " 73% 182/250 [00:12<00:04, 14.67it/s]\u001b[A\n",
            " 74% 184/250 [00:12<00:04, 14.72it/s]\u001b[A\n",
            " 74% 186/250 [00:12<00:04, 14.75it/s]\u001b[A\n",
            " 75% 188/250 [00:12<00:04, 14.67it/s]\u001b[A\n",
            " 76% 190/250 [00:12<00:04, 14.74it/s]\u001b[A\n",
            " 77% 192/250 [00:13<00:03, 14.80it/s]\u001b[A\n",
            " 78% 194/250 [00:13<00:03, 14.86it/s]\u001b[A\n",
            " 78% 196/250 [00:13<00:03, 14.81it/s]\u001b[A\n",
            " 79% 198/250 [00:13<00:03, 14.80it/s]\u001b[A\n",
            " 80% 200/250 [00:13<00:03, 14.83it/s]\u001b[A\n",
            " 81% 202/250 [00:13<00:03, 14.78it/s]\u001b[A\n",
            " 82% 204/250 [00:13<00:03, 14.78it/s]\u001b[A\n",
            " 82% 206/250 [00:14<00:02, 14.73it/s]\u001b[A\n",
            " 83% 208/250 [00:14<00:02, 14.79it/s]\u001b[A\n",
            " 84% 210/250 [00:14<00:02, 14.85it/s]\u001b[A\n",
            " 85% 212/250 [00:14<00:02, 14.85it/s]\u001b[A\n",
            " 86% 214/250 [00:14<00:02, 14.86it/s]\u001b[A\n",
            " 86% 216/250 [00:14<00:02, 14.89it/s]\u001b[A\n",
            " 87% 218/250 [00:14<00:02, 14.83it/s]\u001b[A\n",
            " 88% 220/250 [00:14<00:02, 14.85it/s]\u001b[A\n",
            " 89% 222/250 [00:15<00:01, 14.80it/s]\u001b[A\n",
            " 10% 250/2500 [00:39<03:31, 10.64it/s]\n",
            " 90% 226/250 [00:15<00:01, 14.77it/s]\u001b[A\n",
            " 91% 228/250 [00:15<00:01, 14.81it/s]\u001b[A\n",
            " 92% 230/250 [00:15<00:01, 14.86it/s]\u001b[A\n",
            " 93% 232/250 [00:15<00:01, 14.84it/s]\u001b[A\n",
            " 94% 234/250 [00:15<00:01, 14.70it/s]\u001b[A\n",
            " 94% 236/250 [00:16<00:00, 14.63it/s]\u001b[A\n",
            " 95% 238/250 [00:16<00:00, 14.73it/s]\u001b[A\n",
            " 96% 240/250 [00:16<00:00, 14.69it/s]\u001b[A\n",
            " 97% 242/250 [00:16<00:00, 14.71it/s]\u001b[A\n",
            " 98% 244/250 [00:16<00:00, 14.79it/s]\u001b[A\n",
            " 98% 246/250 [00:16<00:00, 14.77it/s]\u001b[A\n",
            " 99% 248/250 [00:16<00:00, 14.73it/s]\u001b[A\n",
            "100% 250/250 [00:16<00:00, 14.71it/s]\u001b[A\n",
            "{'eval_loss': 9.001160621643066, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.2175, 'eval_samples_per_second': 116.161, 'eval_steps_per_second': 14.52, 'epoch': 0.12}\n",
            "\n",
            " 10% 250/2500 [00:41<03:31, 10.64it/s]\n",
            "{'loss': 10.5792, 'learning_rate': 4.4000000000000006e-05, 'epoch': 0.15}\n",
            "{'loss': 7.8113, 'learning_rate': 4.2e-05, 'epoch': 0.2}\n",
            "{'loss': 5.2658, 'learning_rate': 4e-05, 'epoch': 0.25}\n",
            " 20% 500/2500 [01:05<03:04, 10.83it/s][INFO|trainer.py:2907] 2023-02-14 22:05:35,963 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:05:35,963 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:05:35,963 >>   Batch size = 8\n",
            "\n",
            "  0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
            "  1% 3/250 [00:00<00:11, 22.27it/s]\u001b[A\n",
            "  2% 6/250 [00:00<00:14, 17.12it/s]\u001b[A\n",
            "  3% 8/250 [00:00<00:14, 16.18it/s]\u001b[A\n",
            "  4% 10/250 [00:00<00:15, 15.53it/s]\u001b[A\n",
            "  5% 12/250 [00:00<00:15, 15.20it/s]\u001b[A\n",
            "  6% 14/250 [00:00<00:15, 15.04it/s]\u001b[A\n",
            "  6% 16/250 [00:01<00:15, 14.93it/s]\u001b[A\n",
            "  7% 18/250 [00:01<00:15, 14.86it/s]\u001b[A\n",
            "  8% 20/250 [00:01<00:15, 14.89it/s]\u001b[A\n",
            "  9% 22/250 [00:01<00:15, 14.91it/s]\u001b[A\n",
            " 10% 24/250 [00:01<00:15, 14.77it/s]\u001b[A\n",
            " 10% 26/250 [00:01<00:15, 14.79it/s]\u001b[A\n",
            " 11% 28/250 [00:01<00:15, 14.68it/s]\u001b[A\n",
            " 12% 30/250 [00:01<00:14, 14.68it/s]\u001b[A\n",
            " 13% 32/250 [00:02<00:14, 14.69it/s]\u001b[A\n",
            " 14% 34/250 [00:02<00:14, 14.71it/s]\u001b[A\n",
            " 14% 36/250 [00:02<00:14, 14.71it/s]\u001b[A\n",
            " 15% 38/250 [00:02<00:14, 14.71it/s]\u001b[A\n",
            " 16% 40/250 [00:02<00:14, 14.64it/s]\u001b[A\n",
            " 17% 42/250 [00:02<00:14, 14.61it/s]\u001b[A\n",
            " 18% 44/250 [00:02<00:14, 14.63it/s]\u001b[A\n",
            " 18% 46/250 [00:03<00:13, 14.63it/s]\u001b[A\n",
            " 19% 48/250 [00:03<00:13, 14.74it/s]\u001b[A\n",
            " 20% 50/250 [00:03<00:13, 14.78it/s]\u001b[A\n",
            " 21% 52/250 [00:03<00:13, 14.77it/s]\u001b[A\n",
            " 22% 54/250 [00:03<00:13, 14.77it/s]\u001b[A\n",
            " 22% 56/250 [00:03<00:13, 14.69it/s]\u001b[A\n",
            " 23% 58/250 [00:03<00:13, 14.71it/s]\u001b[A\n",
            " 24% 60/250 [00:04<00:12, 14.78it/s]\u001b[A\n",
            " 25% 62/250 [00:04<00:12, 14.77it/s]\u001b[A\n",
            " 26% 64/250 [00:04<00:12, 14.77it/s]\u001b[A\n",
            " 26% 66/250 [00:04<00:12, 14.76it/s]\u001b[A\n",
            " 27% 68/250 [00:04<00:12, 14.77it/s]\u001b[A\n",
            " 28% 70/250 [00:04<00:12, 14.84it/s]\u001b[A\n",
            " 29% 72/250 [00:04<00:12, 14.77it/s]\u001b[A\n",
            " 30% 74/250 [00:04<00:11, 14.68it/s]\u001b[A\n",
            " 30% 76/250 [00:05<00:11, 14.75it/s]\u001b[A\n",
            " 31% 78/250 [00:05<00:11, 14.75it/s]\u001b[A\n",
            " 32% 80/250 [00:05<00:11, 14.76it/s]\u001b[A\n",
            " 33% 82/250 [00:05<00:11, 14.79it/s]\u001b[A\n",
            " 34% 84/250 [00:05<00:11, 14.77it/s]\u001b[A\n",
            " 34% 86/250 [00:05<00:11, 14.77it/s]\u001b[A\n",
            " 35% 88/250 [00:05<00:10, 14.74it/s]\u001b[A\n",
            " 36% 90/250 [00:06<00:10, 14.74it/s]\u001b[A\n",
            " 37% 92/250 [00:06<00:10, 14.77it/s]\u001b[A\n",
            " 38% 94/250 [00:06<00:10, 14.80it/s]\u001b[A\n",
            " 38% 96/250 [00:06<00:10, 14.78it/s]\u001b[A\n",
            " 39% 98/250 [00:06<00:10, 14.76it/s]\u001b[A\n",
            " 40% 100/250 [00:06<00:10, 14.78it/s]\u001b[A\n",
            " 41% 102/250 [00:06<00:09, 14.81it/s]\u001b[A\n",
            " 42% 104/250 [00:06<00:09, 14.81it/s]\u001b[A\n",
            " 42% 106/250 [00:07<00:09, 14.75it/s]\u001b[A\n",
            " 43% 108/250 [00:07<00:09, 14.81it/s]\u001b[A\n",
            " 44% 110/250 [00:07<00:09, 14.86it/s]\u001b[A\n",
            " 45% 112/250 [00:07<00:09, 14.83it/s]\u001b[A\n",
            " 46% 114/250 [00:07<00:09, 14.87it/s]\u001b[A\n",
            " 46% 116/250 [00:07<00:09, 14.87it/s]\u001b[A\n",
            " 47% 118/250 [00:07<00:08, 14.85it/s]\u001b[A\n",
            " 48% 120/250 [00:08<00:08, 14.73it/s]\u001b[A\n",
            " 49% 122/250 [00:08<00:08, 14.74it/s]\u001b[A\n",
            " 50% 124/250 [00:08<00:08, 14.77it/s]\u001b[A\n",
            " 50% 126/250 [00:08<00:08, 14.75it/s]\u001b[A\n",
            " 51% 128/250 [00:08<00:08, 14.75it/s]\u001b[A\n",
            " 52% 130/250 [00:08<00:08, 14.64it/s]\u001b[A\n",
            " 53% 132/250 [00:08<00:08, 14.49it/s]\u001b[A\n",
            " 54% 134/250 [00:09<00:07, 14.57it/s]\u001b[A\n",
            " 54% 136/250 [00:09<00:07, 14.57it/s]\u001b[A\n",
            " 55% 138/250 [00:09<00:07, 14.60it/s]\u001b[A\n",
            " 56% 140/250 [00:09<00:07, 14.62it/s]\u001b[A\n",
            " 57% 142/250 [00:09<00:07, 14.64it/s]\u001b[A\n",
            " 58% 144/250 [00:09<00:07, 14.57it/s]\u001b[A\n",
            " 58% 146/250 [00:09<00:07, 14.63it/s]\u001b[A\n",
            " 59% 148/250 [00:09<00:06, 14.62it/s]\u001b[A\n",
            " 60% 150/250 [00:10<00:06, 14.61it/s]\u001b[A\n",
            " 61% 152/250 [00:10<00:06, 14.63it/s]\u001b[A\n",
            " 62% 154/250 [00:10<00:06, 14.72it/s]\u001b[A\n",
            " 62% 156/250 [00:10<00:06, 14.75it/s]\u001b[A\n",
            " 63% 158/250 [00:10<00:06, 14.62it/s]\u001b[A\n",
            " 64% 160/250 [00:10<00:06, 14.67it/s]\u001b[A\n",
            " 65% 162/250 [00:10<00:06, 14.65it/s]\u001b[A\n",
            " 66% 164/250 [00:11<00:05, 14.68it/s]\u001b[A\n",
            " 66% 166/250 [00:11<00:05, 14.61it/s]\u001b[A\n",
            " 67% 168/250 [00:11<00:05, 14.62it/s]\u001b[A\n",
            " 68% 170/250 [00:11<00:05, 14.58it/s]\u001b[A\n",
            " 69% 172/250 [00:11<00:05, 14.64it/s]\u001b[A\n",
            " 70% 174/250 [00:11<00:05, 14.67it/s]\u001b[A\n",
            " 70% 176/250 [00:11<00:05, 14.67it/s]\u001b[A\n",
            " 71% 178/250 [00:12<00:04, 14.60it/s]\u001b[A\n",
            " 72% 180/250 [00:12<00:04, 14.49it/s]\u001b[A\n",
            " 73% 182/250 [00:12<00:04, 14.47it/s]\u001b[A\n",
            " 74% 184/250 [00:12<00:04, 14.53it/s]\u001b[A\n",
            " 74% 186/250 [00:12<00:04, 14.57it/s]\u001b[A\n",
            " 75% 188/250 [00:12<00:04, 14.58it/s]\u001b[A\n",
            " 76% 190/250 [00:12<00:04, 14.64it/s]\u001b[A\n",
            " 77% 192/250 [00:12<00:03, 14.64it/s]\u001b[A\n",
            " 78% 194/250 [00:13<00:03, 14.30it/s]\u001b[A\n",
            " 78% 196/250 [00:13<00:03, 14.43it/s]\u001b[A\n",
            " 79% 198/250 [00:13<00:03, 14.54it/s]\u001b[A\n",
            " 80% 200/250 [00:13<00:03, 14.58it/s]\u001b[A\n",
            " 81% 202/250 [00:13<00:03, 14.65it/s]\u001b[A\n",
            " 82% 204/250 [00:13<00:03, 14.67it/s]\u001b[A\n",
            " 82% 206/250 [00:13<00:02, 14.68it/s]\u001b[A\n",
            " 83% 208/250 [00:14<00:02, 14.70it/s]\u001b[A\n",
            " 84% 210/250 [00:14<00:02, 14.73it/s]\u001b[A\n",
            " 85% 212/250 [00:14<00:02, 14.75it/s]\u001b[A\n",
            " 86% 214/250 [00:14<00:02, 14.77it/s]\u001b[A\n",
            " 20% 500/2500 [01:19<03:04, 10.83it/s]\n",
            " 87% 218/250 [00:14<00:02, 14.84it/s]\u001b[A\n",
            " 88% 220/250 [00:14<00:02, 14.87it/s]\u001b[A\n",
            " 89% 222/250 [00:15<00:01, 14.83it/s]\u001b[A\n",
            " 90% 224/250 [00:15<00:01, 14.79it/s]\u001b[A\n",
            " 90% 226/250 [00:15<00:01, 14.69it/s]\u001b[A\n",
            " 91% 228/250 [00:15<00:01, 14.68it/s]\u001b[A\n",
            " 92% 230/250 [00:15<00:01, 14.64it/s]\u001b[A\n",
            " 93% 232/250 [00:15<00:01, 14.54it/s]\u001b[A\n",
            " 94% 234/250 [00:15<00:01, 14.60it/s]\u001b[A\n",
            " 94% 236/250 [00:15<00:00, 14.66it/s]\u001b[A\n",
            " 95% 238/250 [00:16<00:00, 14.73it/s]\u001b[A\n",
            " 96% 240/250 [00:16<00:00, 14.76it/s]\u001b[A\n",
            " 97% 242/250 [00:16<00:00, 14.76it/s]\u001b[A\n",
            " 98% 244/250 [00:16<00:00, 14.82it/s]\u001b[A\n",
            " 98% 246/250 [00:16<00:00, 14.82it/s]\u001b[A\n",
            " 99% 248/250 [00:16<00:00, 14.78it/s]\u001b[A\n",
            "100% 250/250 [00:16<00:00, 14.79it/s]\u001b[A\n",
            "{'eval_loss': 2.1697170734405518, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.1551, 'eval_samples_per_second': 116.584, 'eval_steps_per_second': 14.573, 'epoch': 0.25}\n",
            "\n",
            " 20% 500/2500 [01:22<03:04, 10.83it/s]\n",
            "                                     \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:05:53,119 >> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-500\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 22:05:53,120 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-500/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 22:05:53,749 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-500/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:05:53,750 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-500/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:05:53,750 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-500/special_tokens_map.json\n",
            "[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:05:53,788 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-500/spiece.model\n",
            "{'loss': 3.7795, 'learning_rate': 3.8e-05, 'epoch': 0.3}\n",
            "{'loss': 2.9169, 'learning_rate': 3.6e-05, 'epoch': 0.35}\n",
            " 30% 749/2500 [01:47<02:43, 10.71it/s][INFO|trainer.py:2907] 2023-02-14 22:06:18,135 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:06:18,136 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:06:18,136 >>   Batch size = 8\n",
            "\n",
            "  0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
            "  1% 3/250 [00:00<00:11, 21.21it/s]\u001b[A\n",
            "  2% 6/250 [00:00<00:14, 16.54it/s]\u001b[A\n",
            "  3% 8/250 [00:00<00:15, 15.62it/s]\u001b[A\n",
            "  4% 10/250 [00:00<00:15, 15.04it/s]\u001b[A\n",
            "  5% 12/250 [00:00<00:16, 14.78it/s]\u001b[A\n",
            "  6% 14/250 [00:00<00:16, 14.60it/s]\u001b[A\n",
            "  6% 16/250 [00:01<00:16, 14.53it/s]\u001b[A\n",
            "  7% 18/250 [00:01<00:16, 14.44it/s]\u001b[A\n",
            "  8% 20/250 [00:01<00:15, 14.51it/s]\u001b[A\n",
            "  9% 22/250 [00:01<00:15, 14.57it/s]\u001b[A\n",
            " 10% 24/250 [00:01<00:15, 14.56it/s]\u001b[A\n",
            " 10% 26/250 [00:01<00:15, 14.65it/s]\u001b[A\n",
            " 11% 28/250 [00:01<00:15, 14.64it/s]\u001b[A\n",
            " 12% 30/250 [00:02<00:15, 14.66it/s]\u001b[A\n",
            " 13% 32/250 [00:02<00:14, 14.63it/s]\u001b[A\n",
            " 14% 34/250 [00:02<00:14, 14.67it/s]\u001b[A\n",
            " 14% 36/250 [00:02<00:14, 14.64it/s]\u001b[A\n",
            " 15% 38/250 [00:02<00:14, 14.60it/s]\u001b[A\n",
            " 16% 40/250 [00:02<00:14, 14.58it/s]\u001b[A\n",
            " 17% 42/250 [00:02<00:14, 14.59it/s]\u001b[A\n",
            " 18% 44/250 [00:02<00:14, 14.65it/s]\u001b[A\n",
            " 18% 46/250 [00:03<00:13, 14.69it/s]\u001b[A\n",
            " 19% 48/250 [00:03<00:13, 14.78it/s]\u001b[A\n",
            " 20% 50/250 [00:03<00:13, 14.85it/s]\u001b[A\n",
            " 21% 52/250 [00:03<00:13, 14.84it/s]\u001b[A\n",
            " 22% 54/250 [00:03<00:13, 14.80it/s]\u001b[A\n",
            " 22% 56/250 [00:03<00:13, 14.77it/s]\u001b[A\n",
            " 23% 58/250 [00:03<00:12, 14.77it/s]\u001b[A\n",
            " 24% 60/250 [00:04<00:12, 14.81it/s]\u001b[A\n",
            " 25% 62/250 [00:04<00:12, 14.78it/s]\u001b[A\n",
            " 26% 64/250 [00:04<00:12, 14.76it/s]\u001b[A\n",
            " 26% 66/250 [00:04<00:12, 14.71it/s]\u001b[A\n",
            " 27% 68/250 [00:04<00:12, 14.73it/s]\u001b[A\n",
            " 28% 70/250 [00:04<00:12, 14.66it/s]\u001b[A\n",
            " 29% 72/250 [00:04<00:12, 14.69it/s]\u001b[A\n",
            " 30% 74/250 [00:05<00:12, 14.64it/s]\u001b[A\n",
            " 30% 76/250 [00:05<00:11, 14.70it/s]\u001b[A\n",
            " 31% 78/250 [00:05<00:11, 14.70it/s]\u001b[A\n",
            " 32% 80/250 [00:05<00:11, 14.76it/s]\u001b[A\n",
            " 33% 82/250 [00:05<00:11, 14.76it/s]\u001b[A\n",
            " 34% 84/250 [00:05<00:11, 14.71it/s]\u001b[A\n",
            " 34% 86/250 [00:05<00:11, 14.74it/s]\u001b[A\n",
            " 35% 88/250 [00:05<00:10, 14.76it/s]\u001b[A\n",
            " 36% 90/250 [00:06<00:10, 14.69it/s]\u001b[A\n",
            " 37% 92/250 [00:06<00:10, 14.71it/s]\u001b[A\n",
            " 38% 94/250 [00:06<00:10, 14.75it/s]\u001b[A\n",
            " 38% 96/250 [00:06<00:10, 14.72it/s]\u001b[A\n",
            " 39% 98/250 [00:06<00:10, 14.70it/s]\u001b[A\n",
            " 40% 100/250 [00:06<00:10, 14.68it/s]\u001b[A\n",
            " 41% 102/250 [00:06<00:10, 14.69it/s]\u001b[A\n",
            " 42% 104/250 [00:07<00:09, 14.72it/s]\u001b[A\n",
            " 42% 106/250 [00:07<00:09, 14.65it/s]\u001b[A\n",
            " 43% 108/250 [00:07<00:09, 14.66it/s]\u001b[A\n",
            " 44% 110/250 [00:07<00:09, 14.70it/s]\u001b[A\n",
            " 45% 112/250 [00:07<00:09, 14.69it/s]\u001b[A\n",
            " 46% 114/250 [00:07<00:09, 14.63it/s]\u001b[A\n",
            " 46% 116/250 [00:07<00:09, 14.69it/s]\u001b[A\n",
            " 47% 118/250 [00:07<00:08, 14.71it/s]\u001b[A\n",
            " 48% 120/250 [00:08<00:08, 14.59it/s]\u001b[A\n",
            " 49% 122/250 [00:08<00:08, 14.68it/s]\u001b[A\n",
            " 50% 124/250 [00:08<00:08, 14.68it/s]\u001b[A\n",
            " 50% 126/250 [00:08<00:08, 14.71it/s]\u001b[A\n",
            " 51% 128/250 [00:08<00:08, 14.73it/s]\u001b[A\n",
            " 52% 130/250 [00:08<00:08, 14.64it/s]\u001b[A\n",
            " 53% 132/250 [00:08<00:08, 14.70it/s]\u001b[A\n",
            " 54% 134/250 [00:09<00:07, 14.74it/s]\u001b[A\n",
            " 54% 136/250 [00:09<00:07, 14.41it/s]\u001b[A\n",
            " 55% 138/250 [00:09<00:07, 14.46it/s]\u001b[A\n",
            " 56% 140/250 [00:09<00:07, 14.51it/s]\u001b[A\n",
            " 57% 142/250 [00:09<00:07, 14.60it/s]\u001b[A\n",
            " 58% 144/250 [00:09<00:07, 14.50it/s]\u001b[A\n",
            " 58% 146/250 [00:09<00:07, 14.53it/s]\u001b[A\n",
            " 59% 148/250 [00:10<00:07, 14.55it/s]\u001b[A\n",
            " 60% 150/250 [00:10<00:06, 14.53it/s]\u001b[A\n",
            " 61% 152/250 [00:10<00:06, 14.48it/s]\u001b[A\n",
            " 62% 154/250 [00:10<00:06, 14.60it/s]\u001b[A\n",
            " 62% 156/250 [00:10<00:06, 14.54it/s]\u001b[A\n",
            " 63% 158/250 [00:10<00:06, 14.46it/s]\u001b[A\n",
            " 64% 160/250 [00:10<00:06, 14.42it/s]\u001b[A\n",
            " 65% 162/250 [00:11<00:06, 14.38it/s]\u001b[A\n",
            " 66% 164/250 [00:11<00:05, 14.38it/s]\u001b[A\n",
            " 66% 166/250 [00:11<00:05, 14.32it/s]\u001b[A\n",
            " 67% 168/250 [00:11<00:05, 14.33it/s]\u001b[A\n",
            " 68% 170/250 [00:11<00:05, 14.23it/s]\u001b[A\n",
            " 69% 172/250 [00:11<00:05, 14.23it/s]\u001b[A\n",
            " 70% 174/250 [00:11<00:05, 14.24it/s]\u001b[A\n",
            " 70% 176/250 [00:12<00:05, 14.21it/s]\u001b[A\n",
            " 71% 178/250 [00:12<00:05, 14.17it/s]\u001b[A\n",
            " 72% 180/250 [00:12<00:04, 14.16it/s]\u001b[A\n",
            " 30% 750/2500 [01:59<02:43, 10.71it/s]\n",
            " 74% 184/250 [00:12<00:04, 14.30it/s]\u001b[A\n",
            " 74% 186/250 [00:12<00:04, 14.40it/s]\u001b[A\n",
            " 75% 188/250 [00:12<00:04, 14.40it/s]\u001b[A\n",
            " 76% 190/250 [00:12<00:04, 14.48it/s]\u001b[A\n",
            " 77% 192/250 [00:13<00:03, 14.58it/s]\u001b[A\n",
            " 78% 194/250 [00:13<00:03, 14.58it/s]\u001b[A\n",
            " 78% 196/250 [00:13<00:03, 14.56it/s]\u001b[A\n",
            " 79% 198/250 [00:13<00:03, 14.62it/s]\u001b[A\n",
            " 80% 200/250 [00:13<00:03, 14.69it/s]\u001b[A\n",
            " 81% 202/250 [00:13<00:03, 14.69it/s]\u001b[A\n",
            " 82% 204/250 [00:13<00:03, 14.68it/s]\u001b[A\n",
            " 82% 206/250 [00:14<00:02, 14.68it/s]\u001b[A\n",
            " 83% 208/250 [00:14<00:02, 14.68it/s]\u001b[A\n",
            " 84% 210/250 [00:14<00:02, 14.65it/s]\u001b[A\n",
            " 85% 212/250 [00:14<00:02, 14.72it/s]\u001b[A\n",
            " 86% 214/250 [00:14<00:02, 14.71it/s]\u001b[A\n",
            " 86% 216/250 [00:14<00:02, 14.68it/s]\u001b[A\n",
            " 87% 218/250 [00:14<00:02, 14.69it/s]\u001b[A\n",
            " 88% 220/250 [00:15<00:02, 14.75it/s]\u001b[A\n",
            " 89% 222/250 [00:15<00:01, 14.74it/s]\u001b[A\n",
            " 90% 224/250 [00:15<00:01, 14.76it/s]\u001b[A\n",
            " 90% 226/250 [00:15<00:01, 14.73it/s]\u001b[A\n",
            " 91% 228/250 [00:15<00:01, 14.82it/s]\u001b[A\n",
            " 92% 230/250 [00:15<00:01, 14.77it/s]\u001b[A\n",
            " 93% 232/250 [00:15<00:01, 14.75it/s]\u001b[A\n",
            " 94% 234/250 [00:15<00:01, 14.67it/s]\u001b[A\n",
            " 94% 236/250 [00:16<00:00, 14.65it/s]\u001b[A\n",
            " 95% 238/250 [00:16<00:00, 14.64it/s]\u001b[A\n",
            " 96% 240/250 [00:16<00:00, 14.60it/s]\u001b[A\n",
            " 97% 242/250 [00:16<00:00, 14.60it/s]\u001b[A\n",
            " 98% 244/250 [00:16<00:00, 14.26it/s]\u001b[A\n",
            " 98% 246/250 [00:16<00:00, 14.42it/s]\u001b[A\n",
            " 99% 248/250 [00:16<00:00, 14.45it/s]\u001b[A\n",
            "100% 250/250 [00:17<00:00, 14.54it/s]\u001b[A\n",
            "{'eval_loss': 1.4527522325515747, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.2954, 'eval_samples_per_second': 115.638, 'eval_steps_per_second': 14.455, 'epoch': 0.38}\n",
            "\n",
            " 30% 750/2500 [02:04<02:43, 10.71it/s]\n",
            "{'loss': 2.4516, 'learning_rate': 3.4000000000000007e-05, 'epoch': 0.4}\n",
            "{'loss': 2.2293, 'learning_rate': 3.2000000000000005e-05, 'epoch': 0.45}\n",
            "{'loss': 2.0123, 'learning_rate': 3e-05, 'epoch': 0.5}\n",
            " 40% 1000/2500 [02:27<02:21, 10.63it/s][INFO|trainer.py:2907] 2023-02-14 22:06:58,636 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:06:58,636 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:06:58,636 >>   Batch size = 8\n",
            "\n",
            "  0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
            "  1% 3/250 [00:00<00:12, 20.13it/s]\u001b[A\n",
            "  2% 6/250 [00:00<00:15, 16.26it/s]\u001b[A\n",
            "  3% 8/250 [00:00<00:15, 15.45it/s]\u001b[A\n",
            "  4% 10/250 [00:00<00:15, 15.09it/s]\u001b[A\n",
            "  5% 12/250 [00:00<00:16, 14.85it/s]\u001b[A\n",
            "  6% 14/250 [00:00<00:16, 14.66it/s]\u001b[A\n",
            "  6% 16/250 [00:01<00:16, 14.56it/s]\u001b[A\n",
            "  7% 18/250 [00:01<00:15, 14.65it/s]\u001b[A\n",
            "  8% 20/250 [00:01<00:15, 14.77it/s]\u001b[A\n",
            "  9% 22/250 [00:01<00:15, 14.88it/s]\u001b[A\n",
            " 10% 24/250 [00:01<00:15, 14.83it/s]\u001b[A\n",
            " 10% 26/250 [00:01<00:14, 14.94it/s]\u001b[A\n",
            " 11% 28/250 [00:01<00:14, 14.94it/s]\u001b[A\n",
            " 12% 30/250 [00:01<00:14, 14.96it/s]\u001b[A\n",
            " 13% 32/250 [00:02<00:14, 14.80it/s]\u001b[A\n",
            " 14% 34/250 [00:02<00:14, 14.82it/s]\u001b[A\n",
            " 14% 36/250 [00:02<00:14, 14.73it/s]\u001b[A\n",
            " 15% 38/250 [00:02<00:14, 14.59it/s]\u001b[A\n",
            " 16% 40/250 [00:02<00:14, 14.47it/s]\u001b[A\n",
            " 17% 42/250 [00:02<00:14, 14.47it/s]\u001b[A\n",
            " 18% 44/250 [00:02<00:14, 14.53it/s]\u001b[A\n",
            " 18% 46/250 [00:03<00:14, 14.19it/s]\u001b[A\n",
            " 19% 48/250 [00:03<00:13, 14.44it/s]\u001b[A\n",
            " 20% 50/250 [00:03<00:13, 14.54it/s]\u001b[A\n",
            " 21% 52/250 [00:03<00:13, 14.56it/s]\u001b[A\n",
            " 22% 54/250 [00:03<00:13, 14.64it/s]\u001b[A\n",
            " 22% 56/250 [00:03<00:13, 14.70it/s]\u001b[A\n",
            " 23% 58/250 [00:03<00:13, 14.71it/s]\u001b[A\n",
            " 24% 60/250 [00:04<00:12, 14.77it/s]\u001b[A\n",
            " 25% 62/250 [00:04<00:12, 14.80it/s]\u001b[A\n",
            " 26% 64/250 [00:04<00:12, 14.79it/s]\u001b[A\n",
            " 26% 66/250 [00:04<00:12, 14.79it/s]\u001b[A\n",
            " 27% 68/250 [00:04<00:12, 14.83it/s]\u001b[A\n",
            " 28% 70/250 [00:04<00:12, 14.89it/s]\u001b[A\n",
            " 29% 72/250 [00:04<00:11, 14.88it/s]\u001b[A\n",
            " 30% 74/250 [00:04<00:11, 14.83it/s]\u001b[A\n",
            " 30% 76/250 [00:05<00:11, 14.83it/s]\u001b[A\n",
            " 31% 78/250 [00:05<00:11, 14.83it/s]\u001b[A\n",
            " 32% 80/250 [00:05<00:11, 14.81it/s]\u001b[A\n",
            " 33% 82/250 [00:05<00:11, 14.78it/s]\u001b[A\n",
            " 34% 84/250 [00:05<00:11, 14.78it/s]\u001b[A\n",
            " 34% 86/250 [00:05<00:11, 14.85it/s]\u001b[A\n",
            " 35% 88/250 [00:05<00:10, 14.79it/s]\u001b[A\n",
            " 36% 90/250 [00:06<00:10, 14.68it/s]\u001b[A\n",
            " 37% 92/250 [00:06<00:10, 14.71it/s]\u001b[A\n",
            " 38% 94/250 [00:06<00:10, 14.76it/s]\u001b[A\n",
            " 38% 96/250 [00:06<00:10, 14.70it/s]\u001b[A\n",
            " 39% 98/250 [00:06<00:10, 14.74it/s]\u001b[A\n",
            " 40% 100/250 [00:06<00:10, 14.72it/s]\u001b[A\n",
            " 41% 102/250 [00:06<00:10, 14.76it/s]\u001b[A\n",
            " 42% 104/250 [00:07<00:09, 14.79it/s]\u001b[A\n",
            " 42% 106/250 [00:07<00:09, 14.72it/s]\u001b[A\n",
            " 43% 108/250 [00:07<00:09, 14.81it/s]\u001b[A\n",
            " 44% 110/250 [00:07<00:09, 14.84it/s]\u001b[A\n",
            " 45% 112/250 [00:07<00:09, 14.83it/s]\u001b[A\n",
            " 46% 114/250 [00:07<00:09, 14.82it/s]\u001b[A\n",
            " 46% 116/250 [00:07<00:09, 14.85it/s]\u001b[A\n",
            " 47% 118/250 [00:07<00:08, 14.85it/s]\u001b[A\n",
            " 48% 120/250 [00:08<00:08, 14.80it/s]\u001b[A\n",
            " 49% 122/250 [00:08<00:08, 14.85it/s]\u001b[A\n",
            " 50% 124/250 [00:08<00:08, 14.87it/s]\u001b[A\n",
            " 50% 126/250 [00:08<00:08, 14.88it/s]\u001b[A\n",
            " 51% 128/250 [00:08<00:08, 14.78it/s]\u001b[A\n",
            " 52% 130/250 [00:08<00:08, 14.78it/s]\u001b[A\n",
            " 53% 132/250 [00:08<00:07, 14.81it/s]\u001b[A\n",
            " 54% 134/250 [00:09<00:07, 14.79it/s]\u001b[A\n",
            " 54% 136/250 [00:09<00:07, 14.77it/s]\u001b[A\n",
            " 55% 138/250 [00:09<00:07, 14.77it/s]\u001b[A\n",
            " 56% 140/250 [00:09<00:07, 14.81it/s]\u001b[A\n",
            " 57% 142/250 [00:09<00:07, 14.84it/s]\u001b[A\n",
            " 58% 144/250 [00:09<00:07, 14.84it/s]\u001b[A\n",
            " 58% 146/250 [00:09<00:07, 14.83it/s]\u001b[A\n",
            " 59% 148/250 [00:09<00:06, 14.83it/s]\u001b[A\n",
            " 60% 150/250 [00:10<00:06, 14.74it/s]\u001b[A\n",
            " 61% 152/250 [00:10<00:06, 14.68it/s]\u001b[A\n",
            " 62% 154/250 [00:10<00:06, 14.76it/s]\u001b[A\n",
            " 62% 156/250 [00:10<00:06, 14.77it/s]\u001b[A\n",
            " 63% 158/250 [00:10<00:06, 14.77it/s]\u001b[A\n",
            " 64% 160/250 [00:10<00:06, 14.80it/s]\u001b[A\n",
            " 65% 162/250 [00:10<00:05, 14.70it/s]\u001b[A\n",
            " 66% 164/250 [00:11<00:05, 14.68it/s]\u001b[A\n",
            " 66% 166/250 [00:11<00:05, 14.62it/s]\u001b[A\n",
            " 67% 168/250 [00:11<00:05, 14.69it/s]\u001b[A\n",
            " 68% 170/250 [00:11<00:05, 14.75it/s]\u001b[A\n",
            " 69% 172/250 [00:11<00:05, 14.82it/s]\u001b[A\n",
            " 70% 174/250 [00:11<00:05, 14.87it/s]\u001b[A\n",
            " 40% 1000/2500 [02:39<02:21, 10.63it/s]\n",
            " 71% 178/250 [00:12<00:04, 14.75it/s]\u001b[A\n",
            " 72% 180/250 [00:12<00:04, 14.69it/s]\u001b[A\n",
            " 73% 182/250 [00:12<00:04, 14.68it/s]\u001b[A\n",
            " 74% 184/250 [00:12<00:04, 14.68it/s]\u001b[A\n",
            " 74% 186/250 [00:12<00:04, 14.73it/s]\u001b[A\n",
            " 75% 188/250 [00:12<00:04, 14.69it/s]\u001b[A\n",
            " 76% 190/250 [00:12<00:04, 14.71it/s]\u001b[A\n",
            " 77% 192/250 [00:12<00:03, 14.65it/s]\u001b[A\n",
            " 78% 194/250 [00:13<00:03, 14.65it/s]\u001b[A\n",
            " 78% 196/250 [00:13<00:03, 14.61it/s]\u001b[A\n",
            " 79% 198/250 [00:13<00:03, 14.66it/s]\u001b[A\n",
            " 80% 200/250 [00:13<00:03, 14.63it/s]\u001b[A\n",
            " 81% 202/250 [00:13<00:03, 14.65it/s]\u001b[A\n",
            " 82% 204/250 [00:13<00:03, 14.66it/s]\u001b[A\n",
            " 82% 206/250 [00:13<00:03, 14.58it/s]\u001b[A\n",
            " 83% 208/250 [00:14<00:02, 14.63it/s]\u001b[A\n",
            " 84% 210/250 [00:14<00:02, 14.68it/s]\u001b[A\n",
            " 85% 212/250 [00:14<00:02, 14.65it/s]\u001b[A\n",
            " 86% 214/250 [00:14<00:02, 14.69it/s]\u001b[A\n",
            " 86% 216/250 [00:14<00:02, 14.72it/s]\u001b[A\n",
            " 87% 218/250 [00:14<00:02, 14.67it/s]\u001b[A\n",
            " 88% 220/250 [00:14<00:02, 14.74it/s]\u001b[A\n",
            " 89% 222/250 [00:15<00:01, 14.70it/s]\u001b[A\n",
            " 90% 224/250 [00:15<00:01, 14.64it/s]\u001b[A\n",
            " 90% 226/250 [00:15<00:01, 14.67it/s]\u001b[A\n",
            " 91% 228/250 [00:15<00:01, 14.70it/s]\u001b[A\n",
            " 92% 230/250 [00:15<00:01, 14.69it/s]\u001b[A\n",
            " 93% 232/250 [00:15<00:01, 14.76it/s]\u001b[A\n",
            " 94% 234/250 [00:15<00:01, 14.76it/s]\u001b[A\n",
            " 94% 236/250 [00:15<00:00, 14.73it/s]\u001b[A\n",
            " 95% 238/250 [00:16<00:00, 14.82it/s]\u001b[A\n",
            " 96% 240/250 [00:16<00:00, 14.87it/s]\u001b[A\n",
            " 97% 242/250 [00:16<00:00, 14.88it/s]\u001b[A\n",
            " 98% 244/250 [00:16<00:00, 14.90it/s]\u001b[A\n",
            " 98% 246/250 [00:16<00:00, 14.91it/s]\u001b[A\n",
            " 99% 248/250 [00:16<00:00, 14.90it/s]\u001b[A\n",
            "100% 250/250 [00:16<00:00, 14.92it/s]\u001b[A\n",
            "{'eval_loss': 1.160749912261963, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.1471, 'eval_samples_per_second': 116.638, 'eval_steps_per_second': 14.58, 'epoch': 0.5}\n",
            "\n",
            " 40% 1000/2500 [02:44<02:21, 10.63it/s]\n",
            "                                     \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:07:15,784 >> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-1000\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 22:07:15,785 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-1000/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 22:07:16,414 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-1000/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:07:16,415 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-1000/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:07:16,416 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-1000/special_tokens_map.json\n",
            "[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:07:16,453 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-1000/spiece.model\n",
            "{'loss': 1.9003, 'learning_rate': 2.8000000000000003e-05, 'epoch': 0.55}\n",
            "{'loss': 1.7884, 'learning_rate': 2.6000000000000002e-05, 'epoch': 0.6}\n",
            " 50% 1249/2500 [03:09<01:59, 10.49it/s][INFO|trainer.py:2907] 2023-02-14 22:07:40,879 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:07:40,879 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:07:40,879 >>   Batch size = 8\n",
            "\n",
            "  0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
            "  1% 3/250 [00:00<00:11, 21.99it/s]\u001b[A\n",
            "  2% 6/250 [00:00<00:14, 17.06it/s]\u001b[A\n",
            "  3% 8/250 [00:00<00:15, 16.09it/s]\u001b[A\n",
            "  4% 10/250 [00:00<00:15, 15.50it/s]\u001b[A\n",
            "  5% 12/250 [00:00<00:15, 15.00it/s]\u001b[A\n",
            "  6% 14/250 [00:00<00:15, 14.84it/s]\u001b[A\n",
            "  6% 16/250 [00:01<00:15, 14.74it/s]\u001b[A\n",
            "  7% 18/250 [00:01<00:15, 14.69it/s]\u001b[A\n",
            "  8% 20/250 [00:01<00:15, 14.74it/s]\u001b[A\n",
            "  9% 22/250 [00:01<00:15, 14.73it/s]\u001b[A\n",
            " 10% 24/250 [00:01<00:15, 14.70it/s]\u001b[A\n",
            " 10% 26/250 [00:01<00:15, 14.71it/s]\u001b[A\n",
            " 11% 28/250 [00:01<00:15, 14.56it/s]\u001b[A\n",
            " 12% 30/250 [00:01<00:15, 14.62it/s]\u001b[A\n",
            " 13% 32/250 [00:02<00:14, 14.64it/s]\u001b[A\n",
            " 14% 34/250 [00:02<00:14, 14.56it/s]\u001b[A\n",
            " 14% 36/250 [00:02<00:14, 14.57it/s]\u001b[A\n",
            " 15% 38/250 [00:02<00:14, 14.60it/s]\u001b[A\n",
            " 16% 40/250 [00:02<00:14, 14.60it/s]\u001b[A\n",
            " 17% 42/250 [00:02<00:14, 14.57it/s]\u001b[A\n",
            " 18% 44/250 [00:02<00:14, 14.61it/s]\u001b[A\n",
            " 18% 46/250 [00:03<00:13, 14.64it/s]\u001b[A\n",
            " 19% 48/250 [00:03<00:13, 14.75it/s]\u001b[A\n",
            " 20% 50/250 [00:03<00:13, 14.78it/s]\u001b[A\n",
            " 21% 52/250 [00:03<00:13, 14.73it/s]\u001b[A\n",
            " 22% 54/250 [00:03<00:13, 14.71it/s]\u001b[A\n",
            " 22% 56/250 [00:03<00:13, 14.68it/s]\u001b[A\n",
            " 23% 58/250 [00:03<00:13, 14.63it/s]\u001b[A\n",
            " 24% 60/250 [00:04<00:12, 14.74it/s]\u001b[A\n",
            " 25% 62/250 [00:04<00:12, 14.73it/s]\u001b[A\n",
            " 26% 64/250 [00:04<00:12, 14.68it/s]\u001b[A\n",
            " 26% 66/250 [00:04<00:12, 14.64it/s]\u001b[A\n",
            " 27% 68/250 [00:04<00:12, 14.65it/s]\u001b[A\n",
            " 28% 70/250 [00:04<00:12, 14.68it/s]\u001b[A\n",
            " 29% 72/250 [00:04<00:12, 14.29it/s]\u001b[A\n",
            " 30% 74/250 [00:05<00:12, 14.38it/s]\u001b[A\n",
            " 30% 76/250 [00:05<00:12, 14.47it/s]\u001b[A\n",
            " 31% 78/250 [00:05<00:11, 14.52it/s]\u001b[A\n",
            " 32% 80/250 [00:05<00:11, 14.64it/s]\u001b[A\n",
            " 33% 82/250 [00:05<00:11, 14.66it/s]\u001b[A\n",
            " 34% 84/250 [00:05<00:11, 14.64it/s]\u001b[A\n",
            " 34% 86/250 [00:05<00:11, 14.66it/s]\u001b[A\n",
            " 35% 88/250 [00:05<00:11, 14.72it/s]\u001b[A\n",
            " 36% 90/250 [00:06<00:10, 14.73it/s]\u001b[A\n",
            " 37% 92/250 [00:06<00:10, 14.69it/s]\u001b[A\n",
            " 38% 94/250 [00:06<00:10, 14.75it/s]\u001b[A\n",
            " 38% 96/250 [00:06<00:10, 14.69it/s]\u001b[A\n",
            " 39% 98/250 [00:06<00:10, 14.64it/s]\u001b[A\n",
            " 40% 100/250 [00:06<00:10, 14.67it/s]\u001b[A\n",
            " 41% 102/250 [00:06<00:10, 14.71it/s]\u001b[A\n",
            " 42% 104/250 [00:07<00:09, 14.75it/s]\u001b[A\n",
            " 42% 106/250 [00:07<00:09, 14.71it/s]\u001b[A\n",
            " 43% 108/250 [00:07<00:09, 14.80it/s]\u001b[A\n",
            " 44% 110/250 [00:07<00:09, 14.84it/s]\u001b[A\n",
            " 45% 112/250 [00:07<00:09, 14.73it/s]\u001b[A\n",
            " 46% 114/250 [00:07<00:09, 14.73it/s]\u001b[A\n",
            " 46% 116/250 [00:07<00:09, 14.67it/s]\u001b[A\n",
            " 47% 118/250 [00:07<00:09, 14.50it/s]\u001b[A\n",
            " 48% 120/250 [00:08<00:08, 14.51it/s]\u001b[A\n",
            " 49% 122/250 [00:08<00:08, 14.63it/s]\u001b[A\n",
            " 50% 124/250 [00:08<00:08, 14.69it/s]\u001b[A\n",
            " 50% 126/250 [00:08<00:08, 14.67it/s]\u001b[A\n",
            " 51% 128/250 [00:08<00:08, 14.62it/s]\u001b[A\n",
            " 52% 130/250 [00:08<00:08, 14.60it/s]\u001b[A\n",
            " 53% 132/250 [00:08<00:08, 14.59it/s]\u001b[A\n",
            " 54% 134/250 [00:09<00:07, 14.64it/s]\u001b[A\n",
            " 54% 136/250 [00:09<00:07, 14.65it/s]\u001b[A\n",
            " 55% 138/250 [00:09<00:07, 14.71it/s]\u001b[A\n",
            " 56% 140/250 [00:09<00:07, 14.67it/s]\u001b[A\n",
            " 57% 142/250 [00:09<00:07, 14.70it/s]\u001b[A\n",
            " 58% 144/250 [00:09<00:07, 14.67it/s]\u001b[A\n",
            " 58% 146/250 [00:09<00:07, 14.62it/s]\u001b[A\n",
            " 59% 148/250 [00:10<00:06, 14.65it/s]\u001b[A\n",
            " 60% 150/250 [00:10<00:06, 14.58it/s]\u001b[A\n",
            " 61% 152/250 [00:10<00:06, 14.55it/s]\u001b[A\n",
            " 62% 154/250 [00:10<00:06, 14.58it/s]\u001b[A\n",
            " 62% 156/250 [00:10<00:06, 14.57it/s]\u001b[A\n",
            " 63% 158/250 [00:10<00:06, 14.59it/s]\u001b[A\n",
            " 64% 160/250 [00:10<00:06, 14.66it/s]\u001b[A\n",
            " 65% 162/250 [00:11<00:06, 14.53it/s]\u001b[A\n",
            " 66% 164/250 [00:11<00:05, 14.72it/s]\u001b[A\n",
            " 66% 166/250 [00:11<00:05, 14.60it/s]\u001b[A\n",
            " 67% 168/250 [00:11<00:05, 14.52it/s]\u001b[A\n",
            " 68% 170/250 [00:11<00:05, 14.50it/s]\u001b[A\n",
            " 69% 172/250 [00:11<00:05, 14.49it/s]\u001b[A\n",
            " 70% 174/250 [00:11<00:05, 14.47it/s]\u001b[A\n",
            " 70% 176/250 [00:11<00:05, 14.37it/s]\u001b[A\n",
            " 71% 178/250 [00:12<00:05, 14.29it/s]\u001b[A\n",
            " 72% 180/250 [00:12<00:04, 14.27it/s]\u001b[A\n",
            " 73% 182/250 [00:12<00:04, 14.25it/s]\u001b[A\n",
            " 74% 184/250 [00:12<00:04, 14.27it/s]\u001b[A\n",
            " 74% 186/250 [00:12<00:04, 14.24it/s]\u001b[A\n",
            " 75% 188/250 [00:12<00:04, 14.18it/s]\u001b[A\n",
            " 76% 190/250 [00:12<00:04, 14.22it/s]\u001b[A\n",
            " 77% 192/250 [00:13<00:04, 14.16it/s]\u001b[A\n",
            " 78% 194/250 [00:13<00:03, 14.21it/s]\u001b[A\n",
            " 78% 196/250 [00:13<00:03, 14.22it/s]\u001b[A\n",
            " 79% 198/250 [00:13<00:03, 14.27it/s]\u001b[A\n",
            " 80% 200/250 [00:13<00:03, 14.28it/s]\u001b[A\n",
            " 81% 202/250 [00:13<00:03, 14.16it/s]\u001b[A\n",
            " 82% 204/250 [00:13<00:03, 14.06it/s]\u001b[A\n",
            " 82% 206/250 [00:14<00:03, 14.05it/s]\u001b[A\n",
            " 83% 208/250 [00:14<00:02, 14.06it/s]\u001b[A\n",
            " 84% 210/250 [00:14<00:02, 14.06it/s]\u001b[A\n",
            " 85% 212/250 [00:14<00:02, 13.87it/s]\u001b[A\n",
            " 86% 214/250 [00:14<00:02, 14.01it/s]\u001b[A\n",
            " 86% 216/250 [00:14<00:02, 14.22it/s]\u001b[A\n",
            " 87% 218/250 [00:14<00:02, 14.28it/s]\u001b[A\n",
            " 88% 220/250 [00:15<00:02, 14.42it/s]\u001b[A\n",
            " 89% 222/250 [00:15<00:01, 14.39it/s]\u001b[A\n",
            " 90% 224/250 [00:15<00:01, 14.35it/s]\u001b[A\n",
            " 90% 226/250 [00:15<00:01, 14.49it/s]\u001b[A\n",
            " 91% 228/250 [00:15<00:01, 14.57it/s]\u001b[A\n",
            " 92% 230/250 [00:15<00:01, 14.65it/s]\u001b[A\n",
            " 93% 232/250 [00:15<00:01, 14.74it/s]\u001b[A\n",
            " 94% 234/250 [00:16<00:01, 14.73it/s]\u001b[A\n",
            " 94% 236/250 [00:16<00:00, 14.74it/s]\u001b[A\n",
            " 95% 238/250 [00:16<00:00, 14.80it/s]\u001b[A\n",
            " 96% 240/250 [00:16<00:00, 14.79it/s]\u001b[A\n",
            " 97% 242/250 [00:16<00:00, 14.78it/s]\u001b[A\n",
            " 98% 244/250 [00:16<00:00, 14.83it/s]\u001b[A\n",
            " 98% 246/250 [00:16<00:00, 14.81it/s]\u001b[A\n",
            " 99% 248/250 [00:16<00:00, 14.72it/s]\u001b[A\n",
            "100% 250/250 [00:17<00:00, 14.63it/s]\u001b[A\n",
            "{'eval_loss': 1.0410572290420532, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.3319, 'eval_samples_per_second': 115.394, 'eval_steps_per_second': 14.424, 'epoch': 0.62}\n",
            "\n",
            " 50% 1250/2500 [03:27<01:59, 10.49it/s]\n",
            "{'loss': 1.7415, 'learning_rate': 2.4e-05, 'epoch': 0.65}\n",
            "{'loss': 1.6231, 'learning_rate': 2.2000000000000003e-05, 'epoch': 0.7}\n",
            "{'loss': 1.5278, 'learning_rate': 2e-05, 'epoch': 0.75}\n",
            " 60% 1500/2500 [03:50<01:33, 10.71it/s][INFO|trainer.py:2907] 2023-02-14 22:08:21,432 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:08:21,433 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:08:21,433 >>   Batch size = 8\n",
            "\n",
            "  0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
            "  1% 3/250 [00:00<00:11, 21.79it/s]\u001b[A\n",
            "  2% 6/250 [00:00<00:14, 16.88it/s]\u001b[A\n",
            "  3% 8/250 [00:00<00:15, 15.94it/s]\u001b[A\n",
            "  4% 10/250 [00:00<00:15, 15.36it/s]\u001b[A\n",
            "  5% 12/250 [00:00<00:15, 14.98it/s]\u001b[A\n",
            "  6% 14/250 [00:00<00:16, 14.72it/s]\u001b[A\n",
            "  6% 16/250 [00:01<00:16, 14.47it/s]\u001b[A\n",
            "  7% 18/250 [00:01<00:16, 14.40it/s]\u001b[A\n",
            "  8% 20/250 [00:01<00:15, 14.48it/s]\u001b[A\n",
            "  9% 22/250 [00:01<00:15, 14.54it/s]\u001b[A\n",
            " 10% 24/250 [00:01<00:15, 14.57it/s]\u001b[A\n",
            " 10% 26/250 [00:01<00:15, 14.56it/s]\u001b[A\n",
            " 11% 28/250 [00:01<00:15, 14.52it/s]\u001b[A\n",
            " 12% 30/250 [00:02<00:15, 14.53it/s]\u001b[A\n",
            " 13% 32/250 [00:02<00:15, 14.51it/s]\u001b[A\n",
            " 14% 34/250 [00:02<00:14, 14.51it/s]\u001b[A\n",
            " 14% 36/250 [00:02<00:14, 14.53it/s]\u001b[A\n",
            " 15% 38/250 [00:02<00:14, 14.51it/s]\u001b[A\n",
            " 16% 40/250 [00:02<00:14, 14.52it/s]\u001b[A\n",
            " 17% 42/250 [00:02<00:14, 14.52it/s]\u001b[A\n",
            " 18% 44/250 [00:02<00:14, 14.53it/s]\u001b[A\n",
            " 18% 46/250 [00:03<00:13, 14.62it/s]\u001b[A\n",
            " 19% 48/250 [00:03<00:13, 14.58it/s]\u001b[A\n",
            " 20% 50/250 [00:03<00:13, 14.66it/s]\u001b[A\n",
            " 21% 52/250 [00:03<00:13, 14.70it/s]\u001b[A\n",
            " 22% 54/250 [00:03<00:13, 14.75it/s]\u001b[A\n",
            " 22% 56/250 [00:03<00:13, 14.69it/s]\u001b[A\n",
            " 23% 58/250 [00:03<00:13, 14.72it/s]\u001b[A\n",
            " 24% 60/250 [00:04<00:12, 14.72it/s]\u001b[A\n",
            " 25% 62/250 [00:04<00:12, 14.72it/s]\u001b[A\n",
            " 26% 64/250 [00:04<00:12, 14.66it/s]\u001b[A\n",
            " 26% 66/250 [00:04<00:12, 14.65it/s]\u001b[A\n",
            " 27% 68/250 [00:04<00:12, 14.72it/s]\u001b[A\n",
            " 28% 70/250 [00:04<00:12, 14.80it/s]\u001b[A\n",
            " 29% 72/250 [00:04<00:12, 14.80it/s]\u001b[A\n",
            " 30% 74/250 [00:05<00:11, 14.74it/s]\u001b[A\n",
            " 30% 76/250 [00:05<00:11, 14.77it/s]\u001b[A\n",
            " 31% 78/250 [00:05<00:11, 14.59it/s]\u001b[A\n",
            " 32% 80/250 [00:05<00:11, 14.69it/s]\u001b[A\n",
            " 33% 82/250 [00:05<00:11, 14.69it/s]\u001b[A\n",
            " 34% 84/250 [00:05<00:11, 14.67it/s]\u001b[A\n",
            " 34% 86/250 [00:05<00:11, 14.75it/s]\u001b[A\n",
            " 35% 88/250 [00:05<00:10, 14.80it/s]\u001b[A\n",
            " 36% 90/250 [00:06<00:10, 14.82it/s]\u001b[A\n",
            " 37% 92/250 [00:06<00:10, 14.80it/s]\u001b[A\n",
            " 38% 94/250 [00:06<00:10, 14.81it/s]\u001b[A\n",
            " 38% 96/250 [00:06<00:10, 14.78it/s]\u001b[A\n",
            " 39% 98/250 [00:06<00:10, 14.78it/s]\u001b[A\n",
            " 40% 100/250 [00:06<00:10, 14.73it/s]\u001b[A\n",
            " 41% 102/250 [00:06<00:10, 14.73it/s]\u001b[A\n",
            " 42% 104/250 [00:07<00:09, 14.81it/s]\u001b[A\n",
            " 42% 106/250 [00:07<00:09, 14.73it/s]\u001b[A\n",
            " 43% 108/250 [00:07<00:09, 14.74it/s]\u001b[A\n",
            " 44% 110/250 [00:07<00:09, 14.78it/s]\u001b[A\n",
            " 45% 112/250 [00:07<00:09, 14.73it/s]\u001b[A\n",
            " 46% 114/250 [00:07<00:09, 14.75it/s]\u001b[A\n",
            " 46% 116/250 [00:07<00:09, 14.80it/s]\u001b[A\n",
            " 47% 118/250 [00:07<00:08, 14.80it/s]\u001b[A\n",
            " 48% 120/250 [00:08<00:08, 14.79it/s]\u001b[A\n",
            " 49% 122/250 [00:08<00:08, 14.81it/s]\u001b[A\n",
            " 50% 124/250 [00:08<00:08, 14.76it/s]\u001b[A\n",
            " 50% 126/250 [00:08<00:08, 14.80it/s]\u001b[A\n",
            " 51% 128/250 [00:08<00:08, 14.80it/s]\u001b[A\n",
            " 52% 130/250 [00:08<00:08, 14.81it/s]\u001b[A\n",
            " 53% 132/250 [00:08<00:07, 14.82it/s]\u001b[A\n",
            " 54% 134/250 [00:09<00:07, 14.82it/s]\u001b[A\n",
            " 54% 136/250 [00:09<00:07, 14.70it/s]\u001b[A\n",
            " 55% 138/250 [00:09<00:07, 14.70it/s]\u001b[A\n",
            " 56% 140/250 [00:09<00:07, 14.72it/s]\u001b[A\n",
            " 57% 142/250 [00:09<00:07, 14.73it/s]\u001b[A\n",
            " 58% 144/250 [00:09<00:07, 14.71it/s]\u001b[A\n",
            " 58% 146/250 [00:09<00:07, 14.74it/s]\u001b[A\n",
            " 59% 148/250 [00:10<00:06, 14.74it/s]\u001b[A\n",
            " 60% 150/250 [00:10<00:06, 14.78it/s]\u001b[A\n",
            " 61% 152/250 [00:10<00:06, 14.74it/s]\u001b[A\n",
            " 62% 154/250 [00:10<00:06, 14.79it/s]\u001b[A\n",
            " 62% 156/250 [00:10<00:06, 14.79it/s]\u001b[A\n",
            " 63% 158/250 [00:10<00:06, 14.78it/s]\u001b[A\n",
            " 64% 160/250 [00:10<00:06, 14.85it/s]\u001b[A\n",
            " 65% 162/250 [00:10<00:05, 14.82it/s]\u001b[A\n",
            " 66% 164/250 [00:11<00:05, 14.85it/s]\u001b[A\n",
            " 66% 166/250 [00:11<00:05, 14.89it/s]\u001b[A\n",
            " 67% 168/250 [00:11<00:05, 14.85it/s]\u001b[A\n",
            " 68% 170/250 [00:11<00:05, 14.67it/s]\u001b[A\n",
            " 69% 172/250 [00:11<00:05, 14.56it/s]\u001b[A\n",
            " 70% 174/250 [00:11<00:05, 14.69it/s]\u001b[A\n",
            " 70% 176/250 [00:11<00:05, 14.70it/s]\u001b[A\n",
            " 71% 178/250 [00:12<00:04, 14.69it/s]\u001b[A\n",
            " 72% 180/250 [00:12<00:04, 14.73it/s]\u001b[A\n",
            " 73% 182/250 [00:12<00:04, 14.75it/s]\u001b[A\n",
            " 74% 184/250 [00:12<00:04, 14.78it/s]\u001b[A\n",
            " 74% 186/250 [00:12<00:04, 14.85it/s]\u001b[A\n",
            " 75% 188/250 [00:12<00:04, 14.87it/s]\u001b[A\n",
            " 76% 190/250 [00:12<00:04, 14.91it/s]\u001b[A\n",
            " 77% 192/250 [00:12<00:03, 14.91it/s]\u001b[A\n",
            " 78% 194/250 [00:13<00:03, 14.81it/s]\u001b[A\n",
            " 78% 196/250 [00:13<00:03, 14.65it/s]\u001b[A\n",
            " 79% 198/250 [00:13<00:03, 14.54it/s]\u001b[A\n",
            " 80% 200/250 [00:13<00:03, 14.59it/s]\u001b[A\n",
            " 81% 202/250 [00:13<00:03, 14.63it/s]\u001b[A\n",
            " 82% 204/250 [00:13<00:03, 14.63it/s]\u001b[A\n",
            " 82% 206/250 [00:13<00:03, 14.50it/s]\u001b[A\n",
            " 83% 208/250 [00:14<00:02, 14.58it/s]\u001b[A\n",
            " 84% 210/250 [00:14<00:02, 14.65it/s]\u001b[A\n",
            " 85% 212/250 [00:14<00:02, 14.65it/s]\u001b[A\n",
            " 86% 214/250 [00:14<00:02, 14.49it/s]\u001b[A\n",
            " 86% 216/250 [00:14<00:02, 14.58it/s]\u001b[A\n",
            " 87% 218/250 [00:14<00:02, 14.58it/s]\u001b[A\n",
            " 88% 220/250 [00:14<00:02, 14.66it/s]\u001b[A\n",
            " 89% 222/250 [00:15<00:01, 14.57it/s]\u001b[A\n",
            " 90% 224/250 [00:15<00:01, 14.56it/s]\u001b[A\n",
            " 90% 226/250 [00:15<00:01, 14.58it/s]\u001b[A\n",
            " 91% 228/250 [00:15<00:01, 14.56it/s]\u001b[A\n",
            " 92% 230/250 [00:15<00:01, 14.55it/s]\u001b[A\n",
            " 93% 232/250 [00:15<00:01, 14.49it/s]\u001b[A\n",
            " 94% 234/250 [00:15<00:01, 14.42it/s]\u001b[A\n",
            " 94% 236/250 [00:16<00:00, 14.39it/s]\u001b[A\n",
            " 95% 238/250 [00:16<00:00, 14.40it/s]\u001b[A\n",
            " 96% 240/250 [00:16<00:00, 14.35it/s]\u001b[A\n",
            " 97% 242/250 [00:16<00:00, 14.37it/s]\u001b[A\n",
            " 98% 244/250 [00:16<00:00, 14.43it/s]\u001b[A\n",
            " 98% 246/250 [00:16<00:00, 14.44it/s]\u001b[A\n",
            " 99% 248/250 [00:16<00:00, 14.44it/s]\u001b[A\n",
            "100% 250/250 [00:16<00:00, 14.48it/s]\u001b[A\n",
            "{'eval_loss': 0.9458380341529846, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.379, 'eval_samples_per_second': 115.081, 'eval_steps_per_second': 14.385, 'epoch': 0.75}\n",
            "\n",
            " 60% 1500/2500 [04:07<01:33, 10.71it/s]\n",
            "                                     \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:08:38,813 >> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-1500\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 22:08:38,814 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-1500/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 22:08:39,285 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-1500/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:08:39,286 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-1500/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:08:39,286 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-1500/special_tokens_map.json\n",
            "[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:08:39,322 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-1500/spiece.model\n",
            "{'loss': 1.4835, 'learning_rate': 1.8e-05, 'epoch': 0.8}\n",
            "{'loss': 1.449, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.85}\n",
            " 70% 1749/2500 [04:32<01:10, 10.61it/s][INFO|trainer.py:2907] 2023-02-14 22:09:03,363 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:09:03,363 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:09:03,363 >>   Batch size = 8\n",
            "\n",
            "  0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
            "  1% 3/250 [00:00<00:11, 22.10it/s]\u001b[A\n",
            "  2% 6/250 [00:00<00:14, 17.10it/s]\u001b[A\n",
            "  3% 8/250 [00:00<00:14, 16.16it/s]\u001b[A\n",
            "  4% 10/250 [00:00<00:15, 15.48it/s]\u001b[A\n",
            "  5% 12/250 [00:00<00:15, 15.17it/s]\u001b[A\n",
            "  6% 14/250 [00:00<00:15, 15.00it/s]\u001b[A\n",
            "  6% 16/250 [00:01<00:15, 14.90it/s]\u001b[A\n",
            "  7% 18/250 [00:01<00:15, 14.70it/s]\u001b[A\n",
            "  8% 20/250 [00:01<00:15, 14.59it/s]\u001b[A\n",
            "  9% 22/250 [00:01<00:15, 14.58it/s]\u001b[A\n",
            " 10% 24/250 [00:01<00:15, 14.51it/s]\u001b[A\n",
            " 10% 26/250 [00:01<00:15, 14.59it/s]\u001b[A\n",
            " 11% 28/250 [00:01<00:15, 14.60it/s]\u001b[A\n",
            " 12% 30/250 [00:01<00:15, 14.64it/s]\u001b[A\n",
            " 13% 32/250 [00:02<00:15, 14.49it/s]\u001b[A\n",
            " 14% 34/250 [00:02<00:14, 14.52it/s]\u001b[A\n",
            " 14% 36/250 [00:02<00:14, 14.45it/s]\u001b[A\n",
            " 15% 38/250 [00:02<00:14, 14.39it/s]\u001b[A\n",
            " 16% 40/250 [00:02<00:14, 14.44it/s]\u001b[A\n",
            " 17% 42/250 [00:02<00:14, 14.41it/s]\u001b[A\n",
            " 18% 44/250 [00:02<00:14, 14.46it/s]\u001b[A\n",
            " 18% 46/250 [00:03<00:14, 14.46it/s]\u001b[A\n",
            " 19% 48/250 [00:03<00:13, 14.54it/s]\u001b[A\n",
            " 20% 50/250 [00:03<00:13, 14.47it/s]\u001b[A\n",
            " 21% 52/250 [00:03<00:13, 14.51it/s]\u001b[A\n",
            " 22% 54/250 [00:03<00:13, 14.54it/s]\u001b[A\n",
            " 22% 56/250 [00:03<00:13, 14.62it/s]\u001b[A\n",
            " 23% 58/250 [00:03<00:13, 14.65it/s]\u001b[A\n",
            " 24% 60/250 [00:04<00:12, 14.69it/s]\u001b[A\n",
            " 25% 62/250 [00:04<00:12, 14.75it/s]\u001b[A\n",
            " 26% 64/250 [00:04<00:12, 14.58it/s]\u001b[A\n",
            " 26% 66/250 [00:04<00:12, 14.53it/s]\u001b[A\n",
            " 27% 68/250 [00:04<00:12, 14.61it/s]\u001b[A\n",
            " 28% 70/250 [00:04<00:12, 14.65it/s]\u001b[A\n",
            " 29% 72/250 [00:04<00:12, 14.66it/s]\u001b[A\n",
            " 30% 74/250 [00:05<00:11, 14.67it/s]\u001b[A\n",
            " 30% 76/250 [00:05<00:11, 14.70it/s]\u001b[A\n",
            " 31% 78/250 [00:05<00:11, 14.63it/s]\u001b[A\n",
            " 32% 80/250 [00:05<00:11, 14.64it/s]\u001b[A\n",
            " 33% 82/250 [00:05<00:11, 14.63it/s]\u001b[A\n",
            " 34% 84/250 [00:05<00:11, 14.67it/s]\u001b[A\n",
            " 34% 86/250 [00:05<00:11, 14.75it/s]\u001b[A\n",
            " 35% 88/250 [00:05<00:10, 14.75it/s]\u001b[A\n",
            " 36% 90/250 [00:06<00:10, 14.78it/s]\u001b[A\n",
            " 37% 92/250 [00:06<00:10, 14.83it/s]\u001b[A\n",
            " 38% 94/250 [00:06<00:10, 14.73it/s]\u001b[A\n",
            " 38% 96/250 [00:06<00:10, 14.68it/s]\u001b[A\n",
            " 39% 98/250 [00:06<00:10, 14.65it/s]\u001b[A\n",
            " 40% 100/250 [00:06<00:10, 14.68it/s]\u001b[A\n",
            " 41% 102/250 [00:06<00:10, 14.74it/s]\u001b[A\n",
            " 42% 104/250 [00:07<00:09, 14.80it/s]\u001b[A\n",
            " 42% 106/250 [00:07<00:09, 14.77it/s]\u001b[A\n",
            " 43% 108/250 [00:07<00:09, 14.78it/s]\u001b[A\n",
            " 44% 110/250 [00:07<00:09, 14.83it/s]\u001b[A\n",
            " 45% 112/250 [00:07<00:09, 14.76it/s]\u001b[A\n",
            " 46% 114/250 [00:07<00:09, 14.80it/s]\u001b[A\n",
            " 46% 116/250 [00:07<00:09, 14.68it/s]\u001b[A\n",
            " 47% 118/250 [00:08<00:08, 14.68it/s]\u001b[A\n",
            " 48% 120/250 [00:08<00:08, 14.59it/s]\u001b[A\n",
            " 49% 122/250 [00:08<00:08, 14.60it/s]\u001b[A\n",
            " 50% 124/250 [00:08<00:08, 14.58it/s]\u001b[A\n",
            " 50% 126/250 [00:08<00:08, 14.63it/s]\u001b[A\n",
            " 51% 128/250 [00:08<00:08, 14.64it/s]\u001b[A\n",
            " 52% 130/250 [00:08<00:08, 14.67it/s]\u001b[A\n",
            " 53% 132/250 [00:08<00:08, 14.66it/s]\u001b[A\n",
            " 54% 134/250 [00:09<00:07, 14.74it/s]\u001b[A\n",
            " 54% 136/250 [00:09<00:07, 14.74it/s]\u001b[A\n",
            " 55% 138/250 [00:09<00:07, 14.71it/s]\u001b[A\n",
            " 56% 140/250 [00:09<00:07, 14.67it/s]\u001b[A\n",
            " 57% 142/250 [00:09<00:07, 14.66it/s]\u001b[A\n",
            " 58% 144/250 [00:09<00:07, 14.65it/s]\u001b[A\n",
            " 58% 146/250 [00:09<00:07, 14.66it/s]\u001b[A\n",
            " 59% 148/250 [00:10<00:06, 14.62it/s]\u001b[A\n",
            " 60% 150/250 [00:10<00:06, 14.64it/s]\u001b[A\n",
            " 61% 152/250 [00:10<00:06, 14.63it/s]\u001b[A\n",
            " 62% 154/250 [00:10<00:06, 14.60it/s]\u001b[A\n",
            " 62% 156/250 [00:10<00:06, 14.52it/s]\u001b[A\n",
            " 63% 158/250 [00:10<00:06, 14.55it/s]\u001b[A\n",
            " 64% 160/250 [00:10<00:06, 14.63it/s]\u001b[A\n",
            " 65% 162/250 [00:11<00:06, 14.62it/s]\u001b[A\n",
            " 66% 164/250 [00:11<00:05, 14.65it/s]\u001b[A\n",
            " 66% 166/250 [00:11<00:05, 14.62it/s]\u001b[A\n",
            " 67% 168/250 [00:11<00:05, 14.69it/s]\u001b[A\n",
            " 68% 170/250 [00:11<00:05, 14.71it/s]\u001b[A\n",
            " 69% 172/250 [00:11<00:05, 14.72it/s]\u001b[A\n",
            " 70% 174/250 [00:11<00:05, 14.54it/s]\u001b[A\n",
            " 70% 176/250 [00:11<00:05, 14.61it/s]\u001b[A\n",
            " 71% 178/250 [00:12<00:04, 14.65it/s]\u001b[A\n",
            " 72% 180/250 [00:12<00:04, 14.67it/s]\u001b[A\n",
            " 73% 182/250 [00:12<00:04, 14.65it/s]\u001b[A\n",
            " 74% 184/250 [00:12<00:04, 14.65it/s]\u001b[A\n",
            " 74% 186/250 [00:12<00:04, 14.66it/s]\u001b[A\n",
            " 75% 188/250 [00:12<00:04, 14.62it/s]\u001b[A\n",
            " 76% 190/250 [00:12<00:04, 14.69it/s]\u001b[A\n",
            " 77% 192/250 [00:13<00:03, 14.74it/s]\u001b[A\n",
            " 78% 194/250 [00:13<00:03, 14.81it/s]\u001b[A\n",
            " 78% 196/250 [00:13<00:03, 14.77it/s]\u001b[A\n",
            " 79% 198/250 [00:13<00:03, 14.79it/s]\u001b[A\n",
            " 80% 200/250 [00:13<00:03, 14.50it/s]\u001b[A\n",
            " 81% 202/250 [00:13<00:03, 14.42it/s]\u001b[A\n",
            " 82% 204/250 [00:13<00:03, 14.48it/s]\u001b[A\n",
            " 82% 206/250 [00:14<00:03, 14.52it/s]\u001b[A\n",
            " 83% 208/250 [00:14<00:02, 14.56it/s]\u001b[A\n",
            " 84% 210/250 [00:14<00:02, 14.53it/s]\u001b[A\n",
            " 85% 212/250 [00:14<00:02, 14.58it/s]\u001b[A\n",
            " 86% 214/250 [00:14<00:02, 14.61it/s]\u001b[A\n",
            " 86% 216/250 [00:14<00:02, 14.69it/s]\u001b[A\n",
            " 87% 218/250 [00:14<00:02, 14.70it/s]\u001b[A\n",
            " 88% 220/250 [00:14<00:02, 14.75it/s]\u001b[A\n",
            " 89% 222/250 [00:15<00:01, 14.70it/s]\u001b[A\n",
            " 90% 224/250 [00:15<00:01, 14.75it/s]\u001b[A\n",
            " 90% 226/250 [00:15<00:01, 14.71it/s]\u001b[A\n",
            " 91% 228/250 [00:15<00:01, 14.74it/s]\u001b[A\n",
            " 92% 230/250 [00:15<00:01, 14.68it/s]\u001b[A\n",
            " 93% 232/250 [00:15<00:01, 14.72it/s]\u001b[A\n",
            " 94% 234/250 [00:15<00:01, 14.71it/s]\u001b[A\n",
            " 94% 236/250 [00:16<00:00, 14.62it/s]\u001b[A\n",
            " 95% 238/250 [00:16<00:00, 14.59it/s]\u001b[A\n",
            " 96% 240/250 [00:16<00:00, 14.57it/s]\u001b[A\n",
            " 97% 242/250 [00:16<00:00, 14.61it/s]\u001b[A\n",
            " 98% 244/250 [00:16<00:00, 14.67it/s]\u001b[A\n",
            " 98% 246/250 [00:16<00:00, 14.60it/s]\u001b[A\n",
            " 99% 248/250 [00:16<00:00, 14.63it/s]\u001b[A\n",
            "100% 250/250 [00:17<00:00, 14.66it/s]\u001b[A\n",
            "{'eval_loss': 0.8559792637825012, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.2321, 'eval_samples_per_second': 116.063, 'eval_steps_per_second': 14.508, 'epoch': 0.88}\n",
            "\n",
            " 70% 1750/2500 [04:49<01:10, 10.61it/s]\n",
            "{'loss': 1.4421, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.9}\n",
            "{'loss': 1.3835, 'learning_rate': 1.2e-05, 'epoch': 0.95}\n",
            "{'loss': 1.325, 'learning_rate': 1e-05, 'epoch': 1.0}\n",
            " 80% 2000/2500 [05:12<00:45, 10.89it/s][INFO|trainer.py:2907] 2023-02-14 22:09:43,863 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:09:43,863 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:09:43,863 >>   Batch size = 8\n",
            "\n",
            "  0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
            "  1% 3/250 [00:00<00:11, 21.99it/s]\u001b[A\n",
            "  2% 6/250 [00:00<00:14, 17.18it/s]\u001b[A\n",
            "  3% 8/250 [00:00<00:14, 16.14it/s]\u001b[A\n",
            "  4% 10/250 [00:00<00:15, 15.55it/s]\u001b[A\n",
            "  5% 12/250 [00:00<00:15, 15.22it/s]\u001b[A\n",
            "  6% 14/250 [00:00<00:15, 15.01it/s]\u001b[A\n",
            "  6% 16/250 [00:01<00:15, 14.86it/s]\u001b[A\n",
            "  7% 18/250 [00:01<00:15, 14.84it/s]\u001b[A\n",
            "  8% 20/250 [00:01<00:15, 14.87it/s]\u001b[A\n",
            "  9% 22/250 [00:01<00:15, 14.65it/s]\u001b[A\n",
            " 10% 24/250 [00:01<00:15, 14.46it/s]\u001b[A\n",
            " 10% 26/250 [00:01<00:15, 14.51it/s]\u001b[A\n",
            " 11% 28/250 [00:01<00:15, 14.51it/s]\u001b[A\n",
            " 12% 30/250 [00:01<00:15, 14.50it/s]\u001b[A\n",
            " 13% 32/250 [00:02<00:14, 14.59it/s]\u001b[A\n",
            " 14% 34/250 [00:02<00:14, 14.65it/s]\u001b[A\n",
            " 14% 36/250 [00:02<00:14, 14.69it/s]\u001b[A\n",
            " 15% 38/250 [00:02<00:14, 14.72it/s]\u001b[A\n",
            " 16% 40/250 [00:02<00:14, 14.71it/s]\u001b[A\n",
            " 17% 42/250 [00:02<00:14, 14.69it/s]\u001b[A\n",
            " 18% 44/250 [00:02<00:14, 14.61it/s]\u001b[A\n",
            " 18% 46/250 [00:03<00:14, 14.52it/s]\u001b[A\n",
            " 19% 48/250 [00:03<00:13, 14.56it/s]\u001b[A\n",
            " 20% 50/250 [00:03<00:13, 14.62it/s]\u001b[A\n",
            " 21% 52/250 [00:03<00:13, 14.60it/s]\u001b[A\n",
            " 22% 54/250 [00:03<00:13, 14.56it/s]\u001b[A\n",
            " 22% 56/250 [00:03<00:13, 14.40it/s]\u001b[A\n",
            " 23% 58/250 [00:03<00:13, 14.43it/s]\u001b[A\n",
            " 24% 60/250 [00:04<00:13, 14.46it/s]\u001b[A\n",
            " 25% 62/250 [00:04<00:12, 14.51it/s]\u001b[A\n",
            " 26% 64/250 [00:04<00:12, 14.50it/s]\u001b[A\n",
            " 26% 66/250 [00:04<00:12, 14.44it/s]\u001b[A\n",
            " 27% 68/250 [00:04<00:12, 14.49it/s]\u001b[A\n",
            " 28% 70/250 [00:04<00:12, 14.50it/s]\u001b[A\n",
            " 29% 72/250 [00:04<00:12, 14.52it/s]\u001b[A\n",
            " 30% 74/250 [00:05<00:12, 14.53it/s]\u001b[A\n",
            " 30% 76/250 [00:05<00:12, 14.50it/s]\u001b[A\n",
            " 31% 78/250 [00:05<00:12, 14.33it/s]\u001b[A\n",
            " 32% 80/250 [00:05<00:11, 14.36it/s]\u001b[A\n",
            " 33% 82/250 [00:05<00:11, 14.41it/s]\u001b[A\n",
            " 34% 84/250 [00:05<00:11, 14.37it/s]\u001b[A\n",
            " 34% 86/250 [00:05<00:11, 14.42it/s]\u001b[A\n",
            " 35% 88/250 [00:05<00:11, 14.52it/s]\u001b[A\n",
            " 36% 90/250 [00:06<00:10, 14.55it/s]\u001b[A\n",
            " 37% 92/250 [00:06<00:10, 14.57it/s]\u001b[A\n",
            " 38% 94/250 [00:06<00:10, 14.63it/s]\u001b[A\n",
            " 38% 96/250 [00:06<00:10, 14.64it/s]\u001b[A\n",
            " 39% 98/250 [00:06<00:10, 14.57it/s]\u001b[A\n",
            " 40% 100/250 [00:06<00:10, 14.51it/s]\u001b[A\n",
            " 41% 102/250 [00:06<00:10, 14.60it/s]\u001b[A\n",
            " 42% 104/250 [00:07<00:09, 14.63it/s]\u001b[A\n",
            " 42% 106/250 [00:07<00:09, 14.57it/s]\u001b[A\n",
            " 43% 108/250 [00:07<00:09, 14.67it/s]\u001b[A\n",
            " 44% 110/250 [00:07<00:09, 14.68it/s]\u001b[A\n",
            " 45% 112/250 [00:07<00:09, 14.65it/s]\u001b[A\n",
            " 46% 114/250 [00:07<00:09, 14.65it/s]\u001b[A\n",
            " 46% 116/250 [00:07<00:09, 14.52it/s]\u001b[A\n",
            " 47% 118/250 [00:08<00:09, 14.52it/s]\u001b[A\n",
            " 48% 120/250 [00:08<00:08, 14.51it/s]\u001b[A\n",
            " 49% 122/250 [00:08<00:08, 14.61it/s]\u001b[A\n",
            " 50% 124/250 [00:08<00:08, 14.70it/s]\u001b[A\n",
            " 50% 126/250 [00:08<00:08, 14.75it/s]\u001b[A\n",
            " 51% 128/250 [00:08<00:08, 14.71it/s]\u001b[A\n",
            " 52% 130/250 [00:08<00:08, 14.72it/s]\u001b[A\n",
            " 53% 132/250 [00:08<00:08, 14.71it/s]\u001b[A\n",
            " 54% 134/250 [00:09<00:07, 14.76it/s]\u001b[A\n",
            " 54% 136/250 [00:09<00:07, 14.77it/s]\u001b[A\n",
            " 55% 138/250 [00:09<00:07, 14.81it/s]\u001b[A\n",
            " 56% 140/250 [00:09<00:07, 14.87it/s]\u001b[A\n",
            " 57% 142/250 [00:09<00:07, 14.90it/s]\u001b[A\n",
            " 58% 144/250 [00:09<00:07, 14.69it/s]\u001b[A\n",
            " 58% 146/250 [00:09<00:07, 14.69it/s]\u001b[A\n",
            " 59% 148/250 [00:10<00:06, 14.70it/s]\u001b[A\n",
            " 60% 150/250 [00:10<00:06, 14.62it/s]\u001b[A\n",
            " 61% 152/250 [00:10<00:06, 14.63it/s]\u001b[A\n",
            " 62% 154/250 [00:10<00:06, 14.73it/s]\u001b[A\n",
            " 62% 156/250 [00:10<00:06, 14.71it/s]\u001b[A\n",
            " 63% 158/250 [00:10<00:06, 14.64it/s]\u001b[A\n",
            " 64% 160/250 [00:10<00:06, 14.64it/s]\u001b[A\n",
            " 65% 162/250 [00:11<00:05, 14.70it/s]\u001b[A\n",
            " 66% 164/250 [00:11<00:05, 14.78it/s]\u001b[A\n",
            " 66% 166/250 [00:11<00:05, 14.78it/s]\u001b[A\n",
            " 67% 168/250 [00:11<00:05, 14.82it/s]\u001b[A\n",
            " 68% 170/250 [00:11<00:05, 14.86it/s]\u001b[A\n",
            " 69% 172/250 [00:11<00:05, 14.87it/s]\u001b[A\n",
            " 70% 174/250 [00:11<00:05, 14.91it/s]\u001b[A\n",
            " 70% 176/250 [00:11<00:05, 14.53it/s]\u001b[A\n",
            " 71% 178/250 [00:12<00:04, 14.56it/s]\u001b[A\n",
            " 72% 180/250 [00:12<00:04, 14.57it/s]\u001b[A\n",
            " 73% 182/250 [00:12<00:04, 14.63it/s]\u001b[A\n",
            " 74% 184/250 [00:12<00:04, 14.69it/s]\u001b[A\n",
            " 74% 186/250 [00:12<00:04, 14.75it/s]\u001b[A\n",
            " 75% 188/250 [00:12<00:04, 14.69it/s]\u001b[A\n",
            " 76% 190/250 [00:12<00:04, 14.65it/s]\u001b[A\n",
            " 77% 192/250 [00:13<00:03, 14.69it/s]\u001b[A\n",
            " 78% 194/250 [00:13<00:03, 14.64it/s]\u001b[A\n",
            " 78% 196/250 [00:13<00:03, 14.67it/s]\u001b[A\n",
            " 79% 198/250 [00:13<00:03, 14.72it/s]\u001b[A\n",
            " 80% 200/250 [00:13<00:03, 14.70it/s]\u001b[A\n",
            " 81% 202/250 [00:13<00:03, 14.61it/s]\u001b[A\n",
            " 82% 204/250 [00:13<00:03, 14.59it/s]\u001b[A\n",
            " 82% 206/250 [00:14<00:03, 14.53it/s]\u001b[A\n",
            " 83% 208/250 [00:14<00:02, 14.63it/s]\u001b[A\n",
            " 84% 210/250 [00:14<00:02, 14.70it/s]\u001b[A\n",
            " 85% 212/250 [00:14<00:02, 14.68it/s]\u001b[A\n",
            " 86% 214/250 [00:14<00:02, 14.67it/s]\u001b[A\n",
            " 86% 216/250 [00:14<00:02, 14.73it/s]\u001b[A\n",
            " 87% 218/250 [00:14<00:02, 14.76it/s]\u001b[A\n",
            " 88% 220/250 [00:14<00:02, 14.74it/s]\u001b[A\n",
            " 89% 222/250 [00:15<00:01, 14.74it/s]\u001b[A\n",
            " 90% 224/250 [00:15<00:01, 14.75it/s]\u001b[A\n",
            " 90% 226/250 [00:15<00:01, 14.74it/s]\u001b[A\n",
            " 91% 228/250 [00:15<00:01, 14.70it/s]\u001b[A\n",
            " 92% 230/250 [00:15<00:01, 14.60it/s]\u001b[A\n",
            " 93% 232/250 [00:15<00:01, 14.68it/s]\u001b[A\n",
            " 94% 234/250 [00:15<00:01, 14.47it/s]\u001b[A\n",
            " 94% 236/250 [00:16<00:00, 14.53it/s]\u001b[A\n",
            " 95% 238/250 [00:16<00:00, 14.60it/s]\u001b[A\n",
            " 96% 240/250 [00:16<00:00, 14.61it/s]\u001b[A\n",
            " 97% 242/250 [00:16<00:00, 14.66it/s]\u001b[A\n",
            " 98% 244/250 [00:16<00:00, 14.70it/s]\u001b[A\n",
            " 80% 2000/2500 [05:29<00:45, 10.89it/s]\n",
            " 99% 248/250 [00:16<00:00, 14.70it/s]\u001b[A\n",
            "100% 250/250 [00:17<00:00, 14.62it/s]\u001b[A\n",
            "{'eval_loss': 0.8163257241249084, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.2395, 'eval_samples_per_second': 116.013, 'eval_steps_per_second': 14.502, 'epoch': 1.0}\n",
            "\n",
            " 80% 2000/2500 [05:30<00:45, 10.89it/s]\n",
            "                                     \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:10:01,104 >> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-2000\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 22:10:01,105 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-2000/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 22:10:01,585 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-2000/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:10:01,586 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-2000/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:10:01,586 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-2000/special_tokens_map.json\n",
            "[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:10:01,623 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-2000/spiece.model\n",
            "{'loss': 1.2708, 'learning_rate': 8.000000000000001e-06, 'epoch': 1.05}\n",
            "{'loss': 1.3351, 'learning_rate': 6e-06, 'epoch': 1.1}\n",
            " 90% 2249/2500 [05:54<00:23, 10.80it/s][INFO|trainer.py:2907] 2023-02-14 22:10:25,736 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:10:25,736 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:10:25,736 >>   Batch size = 8\n",
            "\n",
            "  0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
            "  1% 3/250 [00:00<00:11, 21.89it/s]\u001b[A\n",
            "  2% 6/250 [00:00<00:14, 16.90it/s]\u001b[A\n",
            "  3% 8/250 [00:00<00:15, 16.04it/s]\u001b[A\n",
            "  4% 10/250 [00:00<00:15, 15.53it/s]\u001b[A\n",
            "  5% 12/250 [00:00<00:15, 15.20it/s]\u001b[A\n",
            "  6% 14/250 [00:00<00:15, 14.99it/s]\u001b[A\n",
            "  6% 16/250 [00:01<00:15, 14.93it/s]\u001b[A\n",
            "  7% 18/250 [00:01<00:15, 14.90it/s]\u001b[A\n",
            "  8% 20/250 [00:01<00:15, 14.70it/s]\u001b[A\n",
            "  9% 22/250 [00:01<00:15, 14.76it/s]\u001b[A\n",
            " 10% 24/250 [00:01<00:15, 14.76it/s]\u001b[A\n",
            " 10% 26/250 [00:01<00:15, 14.80it/s]\u001b[A\n",
            " 11% 28/250 [00:01<00:15, 14.78it/s]\u001b[A\n",
            " 12% 30/250 [00:01<00:15, 14.64it/s]\u001b[A\n",
            " 13% 32/250 [00:02<00:14, 14.61it/s]\u001b[A\n",
            " 14% 34/250 [00:02<00:14, 14.64it/s]\u001b[A\n",
            " 14% 36/250 [00:02<00:14, 14.56it/s]\u001b[A\n",
            " 15% 38/250 [00:02<00:14, 14.63it/s]\u001b[A\n",
            " 16% 40/250 [00:02<00:14, 14.69it/s]\u001b[A\n",
            " 17% 42/250 [00:02<00:14, 14.72it/s]\u001b[A\n",
            " 18% 44/250 [00:02<00:14, 14.57it/s]\u001b[A\n",
            " 18% 46/250 [00:03<00:14, 14.53it/s]\u001b[A\n",
            " 19% 48/250 [00:03<00:13, 14.45it/s]\u001b[A\n",
            " 20% 50/250 [00:03<00:13, 14.54it/s]\u001b[A\n",
            " 21% 52/250 [00:03<00:13, 14.55it/s]\u001b[A\n",
            " 22% 54/250 [00:03<00:13, 14.57it/s]\u001b[A\n",
            " 22% 56/250 [00:03<00:13, 14.53it/s]\u001b[A\n",
            " 23% 58/250 [00:03<00:13, 14.45it/s]\u001b[A\n",
            " 24% 60/250 [00:04<00:13, 14.50it/s]\u001b[A\n",
            " 25% 62/250 [00:04<00:12, 14.57it/s]\u001b[A\n",
            " 26% 64/250 [00:04<00:12, 14.41it/s]\u001b[A\n",
            " 26% 66/250 [00:04<00:12, 14.43it/s]\u001b[A\n",
            " 27% 68/250 [00:04<00:12, 14.54it/s]\u001b[A\n",
            " 28% 70/250 [00:04<00:12, 14.54it/s]\u001b[A\n",
            " 29% 72/250 [00:04<00:12, 14.48it/s]\u001b[A\n",
            " 30% 74/250 [00:05<00:12, 14.39it/s]\u001b[A\n",
            " 30% 76/250 [00:05<00:11, 14.52it/s]\u001b[A\n",
            " 31% 78/250 [00:05<00:11, 14.52it/s]\u001b[A\n",
            " 32% 80/250 [00:05<00:11, 14.50it/s]\u001b[A\n",
            " 33% 82/250 [00:05<00:11, 14.49it/s]\u001b[A\n",
            " 34% 84/250 [00:05<00:11, 14.54it/s]\u001b[A\n",
            " 34% 86/250 [00:05<00:11, 14.62it/s]\u001b[A\n",
            " 35% 88/250 [00:05<00:11, 14.63it/s]\u001b[A\n",
            " 36% 90/250 [00:06<00:10, 14.59it/s]\u001b[A\n",
            " 37% 92/250 [00:06<00:10, 14.69it/s]\u001b[A\n",
            " 38% 94/250 [00:06<00:10, 14.65it/s]\u001b[A\n",
            " 38% 96/250 [00:06<00:10, 14.60it/s]\u001b[A\n",
            " 39% 98/250 [00:06<00:10, 14.63it/s]\u001b[A\n",
            " 40% 100/250 [00:06<00:10, 14.66it/s]\u001b[A\n",
            " 41% 102/250 [00:06<00:10, 14.65it/s]\u001b[A\n",
            " 42% 104/250 [00:07<00:09, 14.69it/s]\u001b[A\n",
            " 42% 106/250 [00:07<00:09, 14.67it/s]\u001b[A\n",
            " 43% 108/250 [00:07<00:09, 14.75it/s]\u001b[A\n",
            " 44% 110/250 [00:07<00:09, 14.77it/s]\u001b[A\n",
            " 45% 112/250 [00:07<00:09, 14.76it/s]\u001b[A\n",
            " 46% 114/250 [00:07<00:09, 14.78it/s]\u001b[A\n",
            " 46% 116/250 [00:07<00:09, 14.82it/s]\u001b[A\n",
            " 47% 118/250 [00:08<00:08, 14.79it/s]\u001b[A\n",
            " 48% 120/250 [00:08<00:08, 14.80it/s]\u001b[A\n",
            " 49% 122/250 [00:08<00:08, 14.80it/s]\u001b[A\n",
            " 50% 124/250 [00:08<00:08, 14.83it/s]\u001b[A\n",
            " 50% 126/250 [00:08<00:08, 14.81it/s]\u001b[A\n",
            " 51% 128/250 [00:08<00:08, 14.78it/s]\u001b[A\n",
            " 52% 130/250 [00:08<00:08, 14.77it/s]\u001b[A\n",
            " 53% 132/250 [00:08<00:07, 14.80it/s]\u001b[A\n",
            " 54% 134/250 [00:09<00:07, 14.70it/s]\u001b[A\n",
            " 54% 136/250 [00:09<00:07, 14.65it/s]\u001b[A\n",
            " 55% 138/250 [00:09<00:07, 14.65it/s]\u001b[A\n",
            " 56% 140/250 [00:09<00:07, 14.61it/s]\u001b[A\n",
            " 57% 142/250 [00:09<00:07, 14.69it/s]\u001b[A\n",
            " 58% 144/250 [00:09<00:07, 14.75it/s]\u001b[A\n",
            " 58% 146/250 [00:09<00:07, 14.72it/s]\u001b[A\n",
            " 59% 148/250 [00:10<00:06, 14.69it/s]\u001b[A\n",
            " 60% 150/250 [00:10<00:06, 14.65it/s]\u001b[A\n",
            " 61% 152/250 [00:10<00:06, 14.62it/s]\u001b[A\n",
            " 62% 154/250 [00:10<00:06, 14.60it/s]\u001b[A\n",
            " 62% 156/250 [00:10<00:06, 14.64it/s]\u001b[A\n",
            " 63% 158/250 [00:10<00:06, 14.63it/s]\u001b[A\n",
            " 64% 160/250 [00:10<00:06, 14.71it/s]\u001b[A\n",
            " 65% 162/250 [00:10<00:05, 14.69it/s]\u001b[A\n",
            " 66% 164/250 [00:11<00:05, 14.77it/s]\u001b[A\n",
            " 66% 166/250 [00:11<00:05, 14.78it/s]\u001b[A\n",
            " 67% 168/250 [00:11<00:05, 14.79it/s]\u001b[A\n",
            " 68% 170/250 [00:11<00:05, 14.73it/s]\u001b[A\n",
            " 69% 172/250 [00:11<00:05, 14.73it/s]\u001b[A\n",
            " 70% 174/250 [00:11<00:05, 14.79it/s]\u001b[A\n",
            " 70% 176/250 [00:11<00:05, 14.80it/s]\u001b[A\n",
            " 71% 178/250 [00:12<00:04, 14.70it/s]\u001b[A\n",
            " 72% 180/250 [00:12<00:04, 14.66it/s]\u001b[A\n",
            " 73% 182/250 [00:12<00:04, 14.67it/s]\u001b[A\n",
            " 74% 184/250 [00:12<00:04, 14.71it/s]\u001b[A\n",
            " 74% 186/250 [00:12<00:04, 14.76it/s]\u001b[A\n",
            " 75% 188/250 [00:12<00:04, 14.73it/s]\u001b[A\n",
            " 76% 190/250 [00:12<00:04, 14.79it/s]\u001b[A\n",
            " 77% 192/250 [00:13<00:03, 14.72it/s]\u001b[A\n",
            " 78% 194/250 [00:13<00:03, 14.64it/s]\u001b[A\n",
            " 78% 196/250 [00:13<00:03, 14.67it/s]\u001b[A\n",
            " 79% 198/250 [00:13<00:03, 14.68it/s]\u001b[A\n",
            " 80% 200/250 [00:13<00:03, 14.74it/s]\u001b[A\n",
            " 81% 202/250 [00:13<00:03, 14.74it/s]\u001b[A\n",
            " 82% 204/250 [00:13<00:03, 14.67it/s]\u001b[A\n",
            " 82% 206/250 [00:13<00:03, 14.65it/s]\u001b[A\n",
            " 83% 208/250 [00:14<00:02, 14.31it/s]\u001b[A\n",
            " 84% 210/250 [00:14<00:02, 14.45it/s]\u001b[A\n",
            " 85% 212/250 [00:14<00:02, 14.61it/s]\u001b[A\n",
            " 86% 214/250 [00:14<00:02, 14.60it/s]\u001b[A\n",
            " 86% 216/250 [00:14<00:02, 14.70it/s]\u001b[A\n",
            " 90% 2250/2500 [06:09<00:23, 10.80it/s]\n",
            " 88% 220/250 [00:14<00:02, 14.71it/s]\u001b[A\n",
            " 89% 222/250 [00:15<00:01, 14.67it/s]\u001b[A\n",
            " 90% 224/250 [00:15<00:01, 14.73it/s]\u001b[A\n",
            " 90% 226/250 [00:15<00:01, 14.77it/s]\u001b[A\n",
            " 91% 228/250 [00:15<00:01, 14.83it/s]\u001b[A\n",
            " 92% 230/250 [00:15<00:01, 14.84it/s]\u001b[A\n",
            " 93% 232/250 [00:15<00:01, 14.82it/s]\u001b[A\n",
            " 94% 234/250 [00:15<00:01, 14.80it/s]\u001b[A\n",
            " 94% 236/250 [00:16<00:00, 14.78it/s]\u001b[A\n",
            " 95% 238/250 [00:16<00:00, 14.63it/s]\u001b[A\n",
            " 96% 240/250 [00:16<00:00, 14.62it/s]\u001b[A\n",
            " 97% 242/250 [00:16<00:00, 14.66it/s]\u001b[A\n",
            " 98% 244/250 [00:16<00:00, 14.69it/s]\u001b[A\n",
            " 98% 246/250 [00:16<00:00, 14.68it/s]\u001b[A\n",
            " 99% 248/250 [00:16<00:00, 14.62it/s]\u001b[A\n",
            "100% 250/250 [00:16<00:00, 14.59it/s]\u001b[A\n",
            "{'eval_loss': 0.8037287592887878, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.2062, 'eval_samples_per_second': 116.237, 'eval_steps_per_second': 14.53, 'epoch': 1.12}\n",
            "\n",
            " 90% 2250/2500 [06:12<00:23, 10.80it/s]\n",
            "{'loss': 1.2308, 'learning_rate': 4.000000000000001e-06, 'epoch': 1.15}\n",
            "{'loss': 1.376, 'learning_rate': 2.0000000000000003e-06, 'epoch': 1.2}\n",
            "{'loss': 1.2416, 'learning_rate': 0.0, 'epoch': 1.25}\n",
            "100% 2500/2500 [06:35<00:00, 10.84it/s][INFO|trainer.py:2907] 2023-02-14 22:11:06,282 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:11:06,283 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:11:06,283 >>   Batch size = 8\n",
            "\n",
            "  0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
            "  1% 3/250 [00:00<00:11, 21.34it/s]\u001b[A\n",
            "  2% 6/250 [00:00<00:14, 16.78it/s]\u001b[A\n",
            "  3% 8/250 [00:00<00:15, 15.85it/s]\u001b[A\n",
            "  4% 10/250 [00:00<00:15, 15.37it/s]\u001b[A\n",
            "  5% 12/250 [00:00<00:15, 15.00it/s]\u001b[A\n",
            "  6% 14/250 [00:00<00:15, 14.91it/s]\u001b[A\n",
            "  6% 16/250 [00:01<00:15, 14.80it/s]\u001b[A\n",
            "  7% 18/250 [00:01<00:15, 14.76it/s]\u001b[A\n",
            "  8% 20/250 [00:01<00:15, 14.78it/s]\u001b[A\n",
            "  9% 22/250 [00:01<00:15, 14.67it/s]\u001b[A\n",
            " 10% 24/250 [00:01<00:15, 14.60it/s]\u001b[A\n",
            " 10% 26/250 [00:01<00:15, 14.65it/s]\u001b[A\n",
            " 11% 28/250 [00:01<00:15, 14.63it/s]\u001b[A\n",
            " 12% 30/250 [00:01<00:14, 14.67it/s]\u001b[A\n",
            " 13% 32/250 [00:02<00:14, 14.64it/s]\u001b[A\n",
            " 14% 34/250 [00:02<00:14, 14.68it/s]\u001b[A\n",
            " 14% 36/250 [00:02<00:14, 14.62it/s]\u001b[A\n",
            " 15% 38/250 [00:02<00:14, 14.53it/s]\u001b[A\n",
            " 16% 40/250 [00:02<00:14, 14.59it/s]\u001b[A\n",
            " 17% 42/250 [00:02<00:14, 14.63it/s]\u001b[A\n",
            " 18% 44/250 [00:02<00:14, 14.57it/s]\u001b[A\n",
            " 18% 46/250 [00:03<00:13, 14.67it/s]\u001b[A\n",
            " 19% 48/250 [00:03<00:13, 14.73it/s]\u001b[A\n",
            " 20% 50/250 [00:03<00:13, 14.82it/s]\u001b[A\n",
            " 21% 52/250 [00:03<00:13, 14.79it/s]\u001b[A\n",
            " 22% 54/250 [00:03<00:13, 14.71it/s]\u001b[A\n",
            " 22% 56/250 [00:03<00:13, 14.70it/s]\u001b[A\n",
            " 23% 58/250 [00:03<00:13, 14.59it/s]\u001b[A\n",
            " 24% 60/250 [00:04<00:13, 14.53it/s]\u001b[A\n",
            " 25% 62/250 [00:04<00:12, 14.46it/s]\u001b[A\n",
            " 26% 64/250 [00:04<00:12, 14.47it/s]\u001b[A\n",
            " 26% 66/250 [00:04<00:12, 14.48it/s]\u001b[A\n",
            " 27% 68/250 [00:04<00:12, 14.65it/s]\u001b[A\n",
            " 28% 70/250 [00:04<00:12, 14.77it/s]\u001b[A\n",
            " 29% 72/250 [00:04<00:12, 14.74it/s]\u001b[A\n",
            " 30% 74/250 [00:04<00:12, 14.66it/s]\u001b[A\n",
            " 30% 76/250 [00:05<00:11, 14.67it/s]\u001b[A\n",
            " 31% 78/250 [00:05<00:11, 14.68it/s]\u001b[A\n",
            " 32% 80/250 [00:05<00:11, 14.70it/s]\u001b[A\n",
            " 33% 82/250 [00:05<00:11, 14.66it/s]\u001b[A\n",
            " 34% 84/250 [00:05<00:11, 14.61it/s]\u001b[A\n",
            " 34% 86/250 [00:05<00:11, 14.62it/s]\u001b[A\n",
            " 35% 88/250 [00:05<00:11, 14.56it/s]\u001b[A\n",
            " 36% 90/250 [00:06<00:10, 14.59it/s]\u001b[A\n",
            " 37% 92/250 [00:06<00:10, 14.51it/s]\u001b[A\n",
            " 38% 94/250 [00:06<00:10, 14.38it/s]\u001b[A\n",
            " 38% 96/250 [00:06<00:10, 14.33it/s]\u001b[A\n",
            " 39% 98/250 [00:06<00:10, 14.30it/s]\u001b[A\n",
            " 40% 100/250 [00:06<00:10, 14.35it/s]\u001b[A\n",
            " 41% 102/250 [00:06<00:10, 14.40it/s]\u001b[A\n",
            " 42% 104/250 [00:07<00:10, 14.40it/s]\u001b[A\n",
            " 42% 106/250 [00:07<00:10, 14.36it/s]\u001b[A\n",
            " 43% 108/250 [00:07<00:09, 14.27it/s]\u001b[A\n",
            " 44% 110/250 [00:07<00:09, 14.36it/s]\u001b[A\n",
            " 45% 112/250 [00:07<00:09, 14.34it/s]\u001b[A\n",
            " 46% 114/250 [00:07<00:09, 14.33it/s]\u001b[A\n",
            " 46% 116/250 [00:07<00:09, 14.31it/s]\u001b[A\n",
            " 47% 118/250 [00:08<00:09, 14.35it/s]\u001b[A\n",
            " 48% 120/250 [00:08<00:09, 14.41it/s]\u001b[A\n",
            " 49% 122/250 [00:08<00:08, 14.47it/s]\u001b[A\n",
            " 50% 124/250 [00:08<00:08, 14.50it/s]\u001b[A\n",
            " 50% 126/250 [00:08<00:08, 14.59it/s]\u001b[A\n",
            " 51% 128/250 [00:08<00:08, 14.56it/s]\u001b[A\n",
            " 52% 130/250 [00:08<00:08, 14.59it/s]\u001b[A\n",
            " 53% 132/250 [00:09<00:08, 14.59it/s]\u001b[A\n",
            " 54% 134/250 [00:09<00:07, 14.67it/s]\u001b[A\n",
            " 54% 136/250 [00:09<00:07, 14.62it/s]\u001b[A\n",
            " 55% 138/250 [00:09<00:07, 14.57it/s]\u001b[A\n",
            " 56% 140/250 [00:09<00:07, 14.65it/s]\u001b[A\n",
            " 57% 142/250 [00:09<00:07, 14.69it/s]\u001b[A\n",
            " 58% 144/250 [00:09<00:07, 14.76it/s]\u001b[A\n",
            " 58% 146/250 [00:09<00:07, 14.65it/s]\u001b[A\n",
            " 59% 148/250 [00:10<00:06, 14.67it/s]\u001b[A\n",
            " 60% 150/250 [00:10<00:06, 14.75it/s]\u001b[A\n",
            " 61% 152/250 [00:10<00:06, 14.59it/s]\u001b[A\n",
            " 62% 154/250 [00:10<00:06, 14.68it/s]\u001b[A\n",
            " 62% 156/250 [00:10<00:06, 14.72it/s]\u001b[A\n",
            " 63% 158/250 [00:10<00:06, 14.66it/s]\u001b[A\n",
            " 64% 160/250 [00:10<00:06, 14.72it/s]\u001b[A\n",
            " 65% 162/250 [00:11<00:05, 14.67it/s]\u001b[A\n",
            " 66% 164/250 [00:11<00:05, 14.69it/s]\u001b[A\n",
            " 66% 166/250 [00:11<00:05, 14.70it/s]\u001b[A\n",
            " 67% 168/250 [00:11<00:05, 14.67it/s]\u001b[A\n",
            " 68% 170/250 [00:11<00:05, 14.65it/s]\u001b[A\n",
            " 69% 172/250 [00:11<00:05, 14.71it/s]\u001b[A\n",
            " 70% 174/250 [00:11<00:05, 14.72it/s]\u001b[A\n",
            " 70% 176/250 [00:12<00:05, 14.71it/s]\u001b[A\n",
            " 71% 178/250 [00:12<00:04, 14.68it/s]\u001b[A\n",
            " 72% 180/250 [00:12<00:04, 14.56it/s]\u001b[A\n",
            " 73% 182/250 [00:12<00:04, 14.55it/s]\u001b[A\n",
            " 74% 184/250 [00:12<00:04, 14.62it/s]\u001b[A\n",
            " 74% 186/250 [00:12<00:04, 14.63it/s]\u001b[A\n",
            " 75% 188/250 [00:12<00:04, 14.64it/s]\u001b[A\n",
            " 76% 190/250 [00:12<00:04, 14.71it/s]\u001b[A\n",
            " 77% 192/250 [00:13<00:03, 14.64it/s]\u001b[A\n",
            " 78% 194/250 [00:13<00:03, 14.71it/s]\u001b[A\n",
            " 78% 196/250 [00:13<00:03, 14.66it/s]\u001b[A\n",
            " 79% 198/250 [00:13<00:03, 14.67it/s]\u001b[A\n",
            " 80% 200/250 [00:13<00:03, 14.73it/s]\u001b[A\n",
            " 81% 202/250 [00:13<00:03, 14.69it/s]\u001b[A\n",
            " 82% 204/250 [00:13<00:03, 14.60it/s]\u001b[A\n",
            " 82% 206/250 [00:14<00:03, 14.59it/s]\u001b[A\n",
            " 83% 208/250 [00:14<00:02, 14.49it/s]\u001b[A\n",
            "100% 2500/2500 [06:49<00:00, 10.84it/s]\n",
            " 85% 212/250 [00:14<00:02, 14.53it/s]\u001b[A\n",
            " 86% 214/250 [00:14<00:02, 14.51it/s]\u001b[A\n",
            " 86% 216/250 [00:14<00:02, 14.54it/s]\u001b[A\n",
            " 87% 218/250 [00:14<00:02, 14.56it/s]\u001b[A\n",
            " 88% 220/250 [00:15<00:02, 14.67it/s]\u001b[A\n",
            " 89% 222/250 [00:15<00:01, 14.66it/s]\u001b[A\n",
            " 90% 224/250 [00:15<00:01, 14.68it/s]\u001b[A\n",
            " 90% 226/250 [00:15<00:01, 14.68it/s]\u001b[A\n",
            " 91% 228/250 [00:15<00:01, 14.78it/s]\u001b[A\n",
            " 92% 230/250 [00:15<00:01, 14.83it/s]\u001b[A\n",
            " 93% 232/250 [00:15<00:01, 14.82it/s]\u001b[A\n",
            " 94% 234/250 [00:15<00:01, 14.74it/s]\u001b[A\n",
            " 94% 236/250 [00:16<00:00, 14.72it/s]\u001b[A\n",
            " 95% 238/250 [00:16<00:00, 14.71it/s]\u001b[A\n",
            " 96% 240/250 [00:16<00:00, 14.70it/s]\u001b[A\n",
            " 97% 242/250 [00:16<00:00, 14.73it/s]\u001b[A\n",
            " 98% 244/250 [00:16<00:00, 14.74it/s]\u001b[A\n",
            " 98% 246/250 [00:16<00:00, 14.65it/s]\u001b[A\n",
            " 99% 248/250 [00:16<00:00, 14.69it/s]\u001b[A\n",
            "100% 250/250 [00:17<00:00, 14.64it/s]\u001b[A\n",
            "{'eval_loss': 0.7921838760375977, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.2721, 'eval_samples_per_second': 115.794, 'eval_steps_per_second': 14.474, 'epoch': 1.25}\n",
            "\n",
            "100% 2500/2500 [06:52<00:00, 10.84it/s]\n",
            "                                     \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:11:23,556 >> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-2500\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 22:11:23,557 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-2500/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 22:11:24,033 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-2500/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:11:24,034 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-2500/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:11:24,034 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-2500/special_tokens_map.json\n",
            "[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:11:24,070 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-2500/spiece.model\n",
            "[INFO|trainer.py:1852] 2023-02-14 22:11:24,853 >> \n",
            "\n",
            "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
            "\n",
            "\n",
            "[INFO|trainer.py:1946] 2023-02-14 22:11:24,854 >> Loading best model from out/emotion/t5_v1_1/checkpoint-500 (score: 1.0).\n",
            "{'train_runtime': 414.2608, 'train_samples_per_second': 48.279, 'train_steps_per_second': 6.035, 'train_loss': 3.8232721221923827, 'epoch': 1.25}\n",
            "100% 2500/2500 [06:54<00:00,  6.03it/s]\n",
            "[INFO|trainer.py:2656] 2023-02-14 22:11:25,173 >> Saving model checkpoint to out/emotion/t5_v1_1\n",
            "[INFO|configuration_utils.py:447] 2023-02-14 22:11:25,174 >> Configuration saved in out/emotion/t5_v1_1/config.json\n",
            "[INFO|modeling_utils.py:1624] 2023-02-14 22:11:25,662 >> Model weights saved in out/emotion/t5_v1_1/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:11:25,663 >> tokenizer config file saved in out/emotion/t5_v1_1/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:11:25,663 >> Special tokens file saved in out/emotion/t5_v1_1/special_tokens_map.json\n",
            "[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:11:25,703 >> Copy vocab file to out/emotion/t5_v1_1/spiece.model\n",
            "***** train metrics *****\n",
            "  epoch                    =       1.25\n",
            "  train_loss               =     3.8233\n",
            "  train_runtime            = 0:06:54.26\n",
            "  train_samples            =      16000\n",
            "  train_samples_per_second =     48.279\n",
            "  train_steps_per_second   =      6.035\n",
            "INFO:__main__:*** Evaluate ***\n",
            "[INFO|trainer.py:2907] 2023-02-14 22:11:25,713 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:11:25,713 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:11:25,713 >>   Batch size = 8\n",
            "100% 250/250 [00:17<00:00, 14.50it/s]\n",
            "***** eval metrics *****\n",
            "  epoch                   =       1.25\n",
            "  eval_accuracy           =        1.0\n",
            "  eval_bleu               =        0.0\n",
            "  eval_gen_len            =        2.0\n",
            "  eval_loss               =     2.1697\n",
            "  eval_runtime            = 0:00:17.31\n",
            "  eval_samples            =       2000\n",
            "  eval_samples_per_second =    115.494\n",
            "  eval_steps_per_second   =     14.437\n",
            "INFO:__main__:*** Predict ***\n",
            "[INFO|trainer.py:2907] 2023-02-14 22:11:43,033 >> ***** Running Prediction *****\n",
            "[INFO|trainer.py:2909] 2023-02-14 22:11:43,033 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2912] 2023-02-14 22:11:43,034 >>   Batch size = 8\n",
            "100% 250/250 [00:17<00:00, 14.58it/s]\n",
            "***** predict metrics *****\n",
            "  predict_accuracy           =        1.0\n",
            "  predict_bleu               =        0.0\n",
            "  predict_gen_len            =        2.0\n",
            "  predict_loss               =     2.1029\n",
            "  predict_runtime            = 0:00:17.21\n",
            "  predict_samples            =       2000\n",
            "  predict_samples_per_second =    116.158\n",
            "  predict_steps_per_second   =      14.52\n",
            "[INFO|modelcard.py:444] 2023-02-14 22:12:00,417 >> Dropping the following result as it does not have all the necessary fields:\n",
            "{'task': {'name': 'Translation', 'type': 'translation'}, 'metrics': [{'name': 'Bleu', 'type': 'bleu', 'value': 0.0}, {'name': 'Accuracy', 'type': 'accuracy', 'value': 1.0}]}\n"
          ]
        }
      ],
      "source": [
        "!python run_translation.py \\\n",
        "  --cache_dir t5_cache_training \\\n",
        "  --model_name_or_path \"google/t5-v1_1-small\" \\\n",
        "  --train_file data/s2s-train.json \\\n",
        "  --validation_file data/s2s-valid.json \\\n",
        "  --test_file data/s2s-test.json \\\n",
        "  --per_device_train_batch_size 8 \\\n",
        "  --per_device_eval_batch_size 8 \\\n",
        "  --source_lang \"text\" \\\n",
        "  --target_lang \"label\" \\\n",
        "  --source_prefix \"emotion classification\" \\\n",
        "  --max_source_length 256 \\\n",
        "  --max_target_length 128 \\\n",
        "  --generation_max_length 128 \\\n",
        "  --do_train \\\n",
        "  --do_eval \\\n",
        "  --do_predict \\\n",
        "  --predict_with_generate \\\n",
        "  --num_train_epochs 1 \\\n",
        "  --output_dir out/emotion/t5_v1_1  \\\n",
        "  --overwrite_output_dir \\\n",
        "  --eval_steps 250 \\\n",
        "  --evaluation_strategy steps \\\n",
        "  --metric_for_best_model accuracy \\\n",
        "  --logging_steps 100 \\\n",
        "  --save_total_limit 5 \\\n",
        "  --max_steps 2500 \\\n",
        "  --load_best_model_at_end True "
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "XyC_7Ov07ICm"
      },
      "source": [
        "# **FLAN T5**"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 13,
      "metadata": {
        "id": "nX6LOzsF7ICm"
      },
      "outputs": [],
      "source": [
        "from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM\n",
        "import json"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 14,
      "metadata": {
        "id": "EEuIugWA7ICm"
      },
      "outputs": [],
      "source": [
        "if torch.cuda.is_available():\n",
        "    device = 0\n",
        "else:\n",
        "    device = -1"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "def perform_shot_learning(pipeline_type, model_name, test_file):\n",
        "    class_type = AutoModelForSeq2SeqLM\n",
        "    model = class_type.from_pretrained(model_name, torch_dtype=torch.float32)\n",
        "    tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
        "\n",
        "    our_pipeline = pipeline(pipeline_type, model=model, tokenizer=tokenizer, device=device)\n",
        "\n",
        "    correct = 0\n",
        "\n",
        "    labels = \"possible labels: sadness, joy, love, anger, fear, surprise\"\n",
        "\n",
        "    with open(test_file) as f:\n",
        "      f_lines = f.readlines()\n",
        "      for line in f_lines:\n",
        "          ex = json.loads(line)\n",
        "          prompt = ex['text']\n",
        "\n",
        "          tmp = labels + '\\n' + f'text: {prompt}' + '\\n' + 'label: '\n",
        "          \n",
        "          predict = our_pipeline(tmp, do_sample=False)[0]['generated_text']\n",
        "\n",
        "          if predict == ex['label']:\n",
        "            correct += 1\n",
        "\n",
        "    print(f'Accuracy: {correct/len(f_lines)}')"
      ],
      "metadata": {
        "id": "AtDz85GKalzg"
      },
      "execution_count": 15,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "test_ds = 'data/s2s-test.json'"
      ],
      "metadata": {
        "id": "q9-4fzxpaoff"
      },
      "execution_count": 16,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "perform_shot_learning('text2text-generation', 'google/flan-t5-large', test_ds)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 219,
          "referenced_widgets": [
            "18f03144f5194bd2a88064eaae1140f0",
            "36b8333766d44ee2aaa8da8ee75975d2",
            "f7a9b125cf1346468e428abd689ff800",
            "9b9e6161874f41c98d5c5e55d8d4fc86",
            "9925a6f17ba14eee96332f0ea1dc88e5",
            "dce64adfb8334591a8ce182918ecb4e3",
            "9efd8cd2208245aca3f369f0735e2ee1",
            "3d05704ffb0040c8b5bfb5c068c3329b",
            "9564dcdd10c64072bb09e70def311ff3",
            "f406c9b52a274068bd636554558497b2",
            "d97be50f8cc64f8680a6cce112863255",
            "7d6b88e56dad4dcbb0f1b1720f1ff118",
            "eabc78cbdeef40feb36cf90fdbcdfbc7",
            "6477d99dffbc4cf39e2c6998f71e37f7",
            "d63511a8852942309cabe53720939fcc",
            "3096b59f64eb48659a8eedea5a171be4",
            "acc58b06f3b54801b10ee872fab39e6e",
            "c2bd9c9ddab848529e52adfdc7634044",
            "8d7e8c29d7e247f1b55d329d40508526",
            "457b70adcab0464c9f990b13f433c635",
            "0858fe327ec549b488f6169de1d84654",
            "e18a505153c7491f8900142fb1189cd7",
            "945026e5e11448b39ab37fb2a0bd963c",
            "8c3aa97d58cb4f21b59af6253c952859",
            "848ff807a83c4a79a1b3d7d80c29499c",
            "a7b1f6722fcd4e90811041b24df0fe7b",
            "f815d05091814c39a467cd8f528db504",
            "915449ab41d848d39d801b4feb932a4f",
            "2937b015455647abb7a524f858a881d2",
            "c2b6cda9a8e94f7e97d7fb032b8e2bc5",
            "af885a022ad743098e5037e1c8dc760a",
            "088ec36aff7f415abfc4fd926fa0f902",
            "b1b99d863dc64208afc11416d4936c2c",
            "cb9e02be7ec44f6bb6b8771691c114e4",
            "f68a247bddf9484e9f7b1666802f4612",
            "d8d89ac972084304bff515a16e009452",
            "3495b00846ae49acbb0cf3e15edf361e",
            "60f6f23e78ce4ee2abf7389ab936c3ac",
            "9d428e02c4134510baf179ce9137d90c",
            "5298f4cd4e2e404ea66d70c62bcfe439",
            "cd9fdc3eb94a4d00b5af6115318dcf45",
            "d664c674a977456cad109347c0206d0e",
            "17e5dedc0aeb4a1da32113e51158fd74",
            "9b70ec9f110f4080a6a26fd12044fe94"
          ]
        },
        "id": "7fWzF9PVatgL",
        "outputId": "6c37c046-a14c-4cab-e285-fa1ddfeb3241"
      },
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading (…)okenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "18f03144f5194bd2a88064eaae1140f0"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading (…)\"spiece.model\";:   0%|          | 0.00/792k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "7d6b88e56dad4dcbb0f1b1720f1ff118"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "945026e5e11448b39ab37fb2a0bd963c"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "cb9e02be7ec44f6bb6b8771691c114e4"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.8/dist-packages/transformers/pipelines/base.py:1043: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Accuracy: 0.647\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!zip -r /content/projekt.zip /content/"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "mJfe_hnJ_qVC",
        "outputId": "ebdda236-1053-4b29-809d-7be9247edf19"
      },
      "execution_count": 18,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "  adding: content/ (stored 0%)\n",
            "  adding: content/.config/ (stored 0%)\n",
            "  adding: content/.config/config_sentinel (stored 0%)\n",
            "  adding: content/.config/logs/ (stored 0%)\n",
            "  adding: content/.config/logs/2023.02.10/ (stored 0%)\n",
            "  adding: content/.config/logs/2023.02.10/14.32.38.026074.log (deflated 58%)\n",
            "  adding: content/.config/logs/2023.02.10/14.33.38.691407.log (deflated 56%)\n",
            "  adding: content/.config/logs/2023.02.10/14.33.11.427170.log (deflated 58%)\n",
            "  adding: content/.config/logs/2023.02.10/14.33.37.863925.log (deflated 57%)\n",
            "  adding: content/.config/logs/2023.02.10/14.32.12.281772.log (deflated 91%)\n",
            "  adding: content/.config/logs/2023.02.10/14.33.03.230973.log (deflated 86%)\n",
            "  adding: content/.config/gce (stored 0%)\n",
            "  adding: content/.config/.last_survey_prompt.yaml (stored 0%)\n",
            "  adding: content/.config/configurations/ (stored 0%)\n",
            "  adding: content/.config/configurations/config_default (deflated 15%)\n",
            "  adding: content/.config/active_config (stored 0%)\n",
            "  adding: content/.config/.last_update_check.json (deflated 22%)\n",
            "  adding: content/.config/.last_opt_in_prompt.yaml (stored 0%)\n",
            "  adding: content/__pycache__/ (stored 0%)\n",
            "  adding: content/__pycache__/roberta.cpython-38.pyc (deflated 62%)\n",
            "  adding: content/__pycache__/gpt2.cpython-38.pyc (deflated 53%)\n",
            "  adding: content/data/ (stored 0%)\n",
            "  adding: content/data/.ipynb_checkpoints/ (stored 0%)\n",
            "  adding: content/data/test.json (deflated 69%)\n",
            "  adding: content/data/s2s-test.json (deflated 70%)\n",
            "  adding: content/data/s2s-valid.json (deflated 70%)\n",
            "  adding: content/data/valid.json (deflated 69%)\n",
            "  adding: content/data/s2s-train.json (deflated 70%)\n",
            "  adding: content/data/train.json (deflated 69%)\n",
            "  adding: content/req.txt (deflated 30%)\n",
            "  adding: content/.cache_training_roberta/ (stored 0%)\n",
            "  adding: content/.cache_training_roberta/.cache_training_roberta_json_default-1808ac39383e9432_0.0.0_0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.lock (stored 0%)\n",
            "  adding: content/.cache_training_roberta/json/ (stored 0%)\n",
            "  adding: content/.cache_training_roberta/json/default-1808ac39383e9432/ (stored 0%)\n",
            "  adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/ (stored 0%)\n",
            "  adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.incomplete_info.lock (stored 0%)\n",
            "  adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/ (stored 0%)\n",
            "  adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-ff4234a2fb1a9582.arrow (deflated 88%)\n",
            "  adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-test.arrow (deflated 64%)\n",
            "  adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-6bbf8957e5f0cf7b.arrow (deflated 88%)\n",
            "  adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-train.arrow (deflated 64%)\n",
            "  adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/dataset_info.json (deflated 57%)\n",
            "  adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-5efe26f1bca5cac0.arrow (deflated 88%)\n",
            "  adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-validation.arrow (deflated 64%)\n",
            "  adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51_builder.lock (stored 0%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/ (stored 0%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/blobs/ (stored 0%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/blobs/226b0752cac7789c48f0cb3ec53eda48b7be36cc (deflated 53%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/blobs/5606f48548d99a9829d10a96cd364b816b02cd21 (deflated 63%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/blobs/ad0bcbeb288f0d1373d88e0762e66357f55b8311 (deflated 59%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/blobs/8db5e7ac5bfc9ec8b613b776009300fe3685d957 (deflated 47%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/blobs/278b7a95739c4392fae9b818bb5343dde20be1b89318f37a6d939e1e1b9e461b (deflated 41%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/refs/ (stored 0%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/refs/main (deflated 3%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/.no_exist/ (stored 0%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/ (stored 0%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/tokenizer_config.json (stored 0%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/added_tokens.json (stored 0%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/special_tokens_map.json (stored 0%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/snapshots/ (stored 0%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/ (stored 0%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json (deflated 47%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/tokenizer.json (deflated 59%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/merges.txt (deflated 53%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/vocab.json (deflated 63%)\n",
            "  adding: content/.cache_training_roberta/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/pytorch_model.bin (deflated 41%)\n",
            "  adding: content/cache_training_t5/ (stored 0%)\n",
            "  adding: content/cache_training_t5/cache_training_t5_json_default-25a5883a4a222bad_0.0.0_0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.lock (stored 0%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/ (stored 0%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/blobs/ (stored 0%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/blobs/4e28ff6ebdf584f5372d9de68867399142435d9a (deflated 48%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/blobs/b114c318caf72f6e89ea92e0755c41327a453198 (deflated 82%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/blobs/07b81619b82546ab7f30e06c9615c7fca8fe3abd (deflated 44%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/blobs/881bdbffc06e471924ecea57f962bc5f8e2a9f21 (deflated 83%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/blobs/7c9a3e998a8c74b52484f3a1ccfdcc9767972ee6b34ae7a527cdf6f972a34163 (deflated 53%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/refs/ (stored 0%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/refs/main (deflated 5%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/.no_exist/ (stored 0%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/.no_exist/8a88af75516269158a3aa488d1abdfd3d5e4ee49/ (stored 0%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/.no_exist/8a88af75516269158a3aa488d1abdfd3d5e4ee49/tokenizer.json (stored 0%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/.no_exist/8a88af75516269158a3aa488d1abdfd3d5e4ee49/added_tokens.json (stored 0%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/ (stored 0%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/ (stored 0%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json (deflated 44%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/tokenizer_config.json (deflated 82%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/spiece.model (deflated 48%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/special_tokens_map.json (deflated 83%)\n",
            "  adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/pytorch_model.bin (deflated 53%)\n",
            "  adding: content/cache_training_t5/json/ (stored 0%)\n",
            "  adding: content/cache_training_t5/json/default-25a5883a4a222bad/ (stored 0%)\n",
            "  adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/ (stored 0%)\n",
            "  adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.incomplete_info.lock (stored 0%)\n",
            "  adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/ (stored 0%)\n",
            "  adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-bef49b953c77fdf0.arrow (deflated 74%)\n",
            "  adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-105206b5fd478147.arrow (deflated 74%)\n",
            "  adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-040b968aed3576f7.arrow (deflated 74%)\n",
            "  adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-test.arrow (deflated 62%)\n",
            "  adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-f37cf2f406b18541.arrow (deflated 74%)\n",
            "  adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-train.arrow (deflated 62%)\n",
            "  adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/dataset_info.json (deflated 58%)\n",
            "  adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-b0aef076d30fe2f7.arrow (deflated 74%)\n",
            "  adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-validation.arrow (deflated 62%)\n",
            "  adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51_builder.lock (stored 0%)\n",
            "  adding: content/run_glue.py (deflated 73%)\n",
            "  adding: content/run_translation.py (deflated 74%)\n",
            "  adding: content/roberta_custom_training_cache/ (stored 0%)\n",
            "  adding: content/roberta_custom_training_cache/roberta_custom_training_cache_json_default-01aa9d8252a24a0d_0.0.0_0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.lock (stored 0%)\n",
            "  adding: content/roberta_custom_training_cache/json/ (stored 0%)\n",
            "  adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/ (stored 0%)\n",
            "  adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/ (stored 0%)\n",
            "  adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.incomplete_info.lock (stored 0%)\n",
            "  adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/ (stored 0%)\n",
            "  adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-test.arrow (deflated 64%)\n",
            "  adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-train.arrow (deflated 64%)\n",
            "  adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/dataset_info.json (deflated 57%)\n",
            "  adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-e62b2012f3f40cb2.arrow (deflated 88%)\n",
            "  adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-cd497527f5c67ba7.arrow (deflated 88%)\n",
            "  adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-9c2deb15eb4326c1.arrow (deflated 88%)\n",
            "  adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-validation.arrow (deflated 64%)\n",
            "  adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51_builder.lock (stored 0%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/ (stored 0%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/blobs/ (stored 0%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/blobs/226b0752cac7789c48f0cb3ec53eda48b7be36cc (deflated 53%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/blobs/5606f48548d99a9829d10a96cd364b816b02cd21 (deflated 63%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/blobs/ad0bcbeb288f0d1373d88e0762e66357f55b8311 (deflated 59%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/blobs/8db5e7ac5bfc9ec8b613b776009300fe3685d957 (deflated 47%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/blobs/278b7a95739c4392fae9b818bb5343dde20be1b89318f37a6d939e1e1b9e461b (deflated 41%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/refs/ (stored 0%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/refs/main (deflated 3%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/.no_exist/ (stored 0%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/ (stored 0%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/tokenizer_config.json (stored 0%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/added_tokens.json (stored 0%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/special_tokens_map.json (stored 0%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ (stored 0%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/ (stored 0%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json (deflated 47%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/tokenizer.json (deflated 59%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/merges.txt (deflated 53%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/vocab.json (deflated 63%)\n",
            "  adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/pytorch_model.bin (deflated 41%)\n",
            "  adding: content/gtp_cache_training/ (stored 0%)\n",
            "  adding: content/gtp_cache_training/json/ (stored 0%)\n",
            "  adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/ (stored 0%)\n",
            "  adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/ (stored 0%)\n",
            "  adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.incomplete_info.lock (stored 0%)\n",
            "  adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/ (stored 0%)\n",
            "  adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-test.arrow (deflated 64%)\n",
            "  adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-train.arrow (deflated 64%)\n",
            "  adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-7b339bb99d7c17a1.arrow (deflated 88%)\n",
            "  adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/dataset_info.json (deflated 57%)\n",
            "  adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-82acdaa33d6aa0eb.arrow (deflated 88%)\n",
            "  adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-validation.arrow (deflated 64%)\n",
            "  adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-bb8faaac56c0b87e.arrow (deflated 88%)\n",
            "  adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51_builder.lock (stored 0%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/ (stored 0%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/blobs/ (stored 0%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/blobs/226b0752cac7789c48f0cb3ec53eda48b7be36cc (deflated 53%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/blobs/7c5d3f4b8b76583b422fcb9189ad6c89d5d97a094541ce8932dce3ecabde1421 (deflated 16%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/blobs/1f1d9aaca301414e7f6c9396df506798ff4eb9a6 (deflated 67%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/blobs/10c66461e4c109db5a2196bff4bb59be30396ed8 (deflated 50%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/blobs/4b988bccc9dc5adacd403c00b4704976196548f8 (deflated 59%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/refs/ (stored 0%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/refs/main (deflated 3%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/.no_exist/ (stored 0%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/.no_exist/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/ (stored 0%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/.no_exist/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer_config.json (stored 0%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/.no_exist/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/added_tokens.json (stored 0%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/.no_exist/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/special_tokens_map.json (stored 0%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/snapshots/ (stored 0%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/ (stored 0%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json (deflated 50%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer.json (deflated 59%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/merges.txt (deflated 53%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/vocab.json (deflated 67%)\n",
            "  adding: content/gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin (deflated 16%)\n",
            "  adding: content/gtp_cache_training/gtp_cache_training_json_default-01aa9d8252a24a0d_0.0.0_0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.lock (stored 0%)\n",
            "  adding: content/t5_cache_training/ (stored 0%)\n",
            "  adding: content/t5_cache_training/t5_cache_training_json_default-a82ca4164dba097e_0.0.0_0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.lock (stored 0%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/ (stored 0%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/blobs/ (stored 0%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/blobs/4e28ff6ebdf584f5372d9de68867399142435d9a (deflated 48%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/blobs/b114c318caf72f6e89ea92e0755c41327a453198 (deflated 82%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/blobs/07b81619b82546ab7f30e06c9615c7fca8fe3abd (deflated 44%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/blobs/881bdbffc06e471924ecea57f962bc5f8e2a9f21 (deflated 83%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/blobs/7c9a3e998a8c74b52484f3a1ccfdcc9767972ee6b34ae7a527cdf6f972a34163 (deflated 53%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/refs/ (stored 0%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/refs/main (deflated 5%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/.no_exist/ (stored 0%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/.no_exist/8a88af75516269158a3aa488d1abdfd3d5e4ee49/ (stored 0%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/.no_exist/8a88af75516269158a3aa488d1abdfd3d5e4ee49/tokenizer.json (stored 0%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/.no_exist/8a88af75516269158a3aa488d1abdfd3d5e4ee49/added_tokens.json (stored 0%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/ (stored 0%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/ (stored 0%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json (deflated 44%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/tokenizer_config.json (deflated 82%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/spiece.model (deflated 48%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/special_tokens_map.json (deflated 83%)\n",
            "  adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/pytorch_model.bin (deflated 53%)\n",
            "  adding: content/t5_cache_training/json/ (stored 0%)\n",
            "  adding: content/t5_cache_training/json/default-a82ca4164dba097e/ (stored 0%)\n",
            "  adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/ (stored 0%)\n",
            "  adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.incomplete_info.lock (stored 0%)\n",
            "  adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/ (stored 0%)\n",
            "  adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-test.arrow (deflated 62%)\n",
            "  adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-988bff0993eee389.arrow (deflated 74%)\n",
            "  adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-train.arrow (deflated 62%)\n",
            "  adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/dataset_info.json (deflated 58%)\n",
            "  adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-fa17416eabe18767.arrow (deflated 74%)\n",
            "  adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-c6cebbf9290f7df0.arrow (deflated 74%)\n",
            "  adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-validation.arrow (deflated 62%)\n",
            "  adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51_builder.lock (stored 0%)\n",
            "  adding: content/out/ (stored 0%)\n",
            "  adding: content/out/emotion/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2000/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2000/scheduler.pt (deflated 49%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2000/rng_state.pth (deflated 28%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2000/config.json (deflated 56%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2000/tokenizer_config.json (deflated 41%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2000/trainer_state.json (deflated 79%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2000/tokenizer.json (deflated 72%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2000/optimizer.pt (deflated 30%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2000/training_args.bin (deflated 48%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2000/special_tokens_map.json (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2000/merges.txt (deflated 53%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2000/vocab.json (deflated 59%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2000/pytorch_model.bin (deflated 9%)\n",
            "  adding: content/out/emotion/gpt2_custom/config.json (deflated 56%)\n",
            "  adding: content/out/emotion/gpt2_custom/all_results.json (deflated 56%)\n",
            "  adding: content/out/emotion/gpt2_custom/predict_results_None.txt (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2_custom/tokenizer_config.json (deflated 41%)\n",
            "  adding: content/out/emotion/gpt2_custom/trainer_state.json (deflated 80%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1500/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1500/scheduler.pt (deflated 49%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1500/rng_state.pth (deflated 28%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1500/config.json (deflated 56%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1500/tokenizer_config.json (deflated 41%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1500/trainer_state.json (deflated 77%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1500/tokenizer.json (deflated 72%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1500/optimizer.pt (deflated 30%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1500/training_args.bin (deflated 48%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1500/special_tokens_map.json (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1500/merges.txt (deflated 53%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1500/vocab.json (deflated 59%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1500/pytorch_model.bin (deflated 9%)\n",
            "  adding: content/out/emotion/gpt2_custom/train_results.json (deflated 40%)\n",
            "  adding: content/out/emotion/gpt2_custom/tokenizer.json (deflated 72%)\n",
            "  adding: content/out/emotion/gpt2_custom/eval_results.json (deflated 41%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2500/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2500/scheduler.pt (deflated 50%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2500/rng_state.pth (deflated 28%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2500/config.json (deflated 56%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2500/tokenizer_config.json (deflated 41%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2500/trainer_state.json (deflated 80%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2500/tokenizer.json (deflated 72%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2500/optimizer.pt (deflated 30%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2500/training_args.bin (deflated 48%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2500/special_tokens_map.json (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2500/merges.txt (deflated 53%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2500/vocab.json (deflated 59%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-2500/pytorch_model.bin (deflated 9%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-11-35_fc0011e45a00/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-11-35_fc0011e45a00/1676409101.551365/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-11-35_fc0011e45a00/1676409101.551365/events.out.tfevents.1676409101.fc0011e45a00.60473.1 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-11-35_fc0011e45a00/events.out.tfevents.1676409101.fc0011e45a00.60473.0 (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-53_fc0011e45a00/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-53_fc0011e45a00/events.out.tfevents.1676407620.fc0011e45a00.53924.0 (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-53_fc0011e45a00/1676407620.269752/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-53_fc0011e45a00/1676407620.269752/events.out.tfevents.1676407620.fc0011e45a00.53924.1 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00/events.out.tfevents.1676411802.fc0011e45a00.72811.0 (deflated 63%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00/events.out.tfevents.1676412248.fc0011e45a00.72811.2 (deflated 28%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00/1676411802.9557116/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00/1676411802.9557116/events.out.tfevents.1676411802.fc0011e45a00.72811.1 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-13-12_fc0011e45a00/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-13-12_fc0011e45a00/events.out.tfevents.1676409199.fc0011e45a00.60936.0 (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-13-12_fc0011e45a00/1676409199.1303008/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-13-12_fc0011e45a00/1676409199.1303008/events.out.tfevents.1676409199.fc0011e45a00.60936.1 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-59-18_fc0011e45a00/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-59-18_fc0011e45a00/1676408364.7675455/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-59-18_fc0011e45a00/1676408364.7675455/events.out.tfevents.1676408364.fc0011e45a00.57251.1 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-59-18_fc0011e45a00/events.out.tfevents.1676408364.fc0011e45a00.57251.0 (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-14-48_fc0011e45a00/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-14-48_fc0011e45a00/events.out.tfevents.1676409294.fc0011e45a00.61381.0 (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-14-48_fc0011e45a00/1676409294.483754/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-14-48_fc0011e45a00/1676409294.483754/events.out.tfevents.1676409294.fc0011e45a00.61381.1 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-07_fc0011e45a00/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-07_fc0011e45a00/events.out.tfevents.1676407574.fc0011e45a00.53675.0 (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-07_fc0011e45a00/1676407574.5370467/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-07_fc0011e45a00/1676407574.5370467/events.out.tfevents.1676407574.fc0011e45a00.53675.1 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-15-57_fc0011e45a00/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-15-57_fc0011e45a00/1676409363.3658211/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-15-57_fc0011e45a00/1676409363.3658211/events.out.tfevents.1676409363.fc0011e45a00.61724.1 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-15-57_fc0011e45a00/events.out.tfevents.1676409363.fc0011e45a00.61724.0 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-44-02_fc0011e45a00/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-44-02_fc0011e45a00/events.out.tfevents.1676407449.fc0011e45a00.53094.0 (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-44-02_fc0011e45a00/1676407449.3215246/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-44-02_fc0011e45a00/1676407449.3215246/events.out.tfevents.1676407449.fc0011e45a00.53094.1 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-09-03_fc0011e45a00/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-09-03_fc0011e45a00/events.out.tfevents.1676408949.fc0011e45a00.59782.0 (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-09-03_fc0011e45a00/1676408949.6798263/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-09-03_fc0011e45a00/1676408949.6798263/events.out.tfevents.1676408949.fc0011e45a00.59782.1 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-41-48_fc0011e45a00/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-41-48_fc0011e45a00/events.out.tfevents.1676410915.fc0011e45a00.68705.0 (deflated 57%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-41-48_fc0011e45a00/1676410915.0364006/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_21-41-48_fc0011e45a00/1676410915.0364006/events.out.tfevents.1676410915.fc0011e45a00.68705.1 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-48-55_fc0011e45a00/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-48-55_fc0011e45a00/events.out.tfevents.1676407741.fc0011e45a00.54546.0 (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-48-55_fc0011e45a00/1676407741.3566854/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-48-55_fc0011e45a00/1676407741.3566854/events.out.tfevents.1676407741.fc0011e45a00.54546.1 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-47-46_fc0011e45a00/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-47-46_fc0011e45a00/events.out.tfevents.1676407672.fc0011e45a00.54203.0 (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-47-46_fc0011e45a00/1676407672.9366086/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-47-46_fc0011e45a00/1676407672.9366086/events.out.tfevents.1676407672.fc0011e45a00.54203.1 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-56-39_fc0011e45a00/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-56-39_fc0011e45a00/events.out.tfevents.1676408205.fc0011e45a00.56536.0 (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-56-39_fc0011e45a00/1676408205.8404686/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-56-39_fc0011e45a00/1676408205.8404686/events.out.tfevents.1676408205.fc0011e45a00.56536.1 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-55-46_fc0011e45a00/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-55-46_fc0011e45a00/1676408153.0722597/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-55-46_fc0011e45a00/1676408153.0722597/events.out.tfevents.1676408153.fc0011e45a00.56263.1 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2_custom/runs/Feb14_20-55-46_fc0011e45a00/events.out.tfevents.1676408153.fc0011e45a00.56263.0 (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1000/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1000/scheduler.pt (deflated 49%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1000/rng_state.pth (deflated 28%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1000/config.json (deflated 56%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1000/tokenizer_config.json (deflated 41%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1000/trainer_state.json (deflated 75%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1000/tokenizer.json (deflated 72%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1000/optimizer.pt (deflated 30%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1000/training_args.bin (deflated 48%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1000/special_tokens_map.json (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1000/merges.txt (deflated 53%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1000/vocab.json (deflated 59%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-1000/pytorch_model.bin (deflated 9%)\n",
            "  adding: content/out/emotion/gpt2_custom/README.md (deflated 54%)\n",
            "  adding: content/out/emotion/gpt2_custom/training_args.bin (deflated 48%)\n",
            "  adding: content/out/emotion/gpt2_custom/special_tokens_map.json (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2_custom/merges.txt (deflated 53%)\n",
            "  adding: content/out/emotion/gpt2_custom/vocab.json (deflated 59%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-500/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-500/scheduler.pt (deflated 49%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-500/rng_state.pth (deflated 28%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-500/config.json (deflated 56%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-500/tokenizer_config.json (deflated 41%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-500/trainer_state.json (deflated 67%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-500/tokenizer.json (deflated 72%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-500/optimizer.pt (deflated 31%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-500/training_args.bin (deflated 48%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-500/special_tokens_map.json (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-500/merges.txt (deflated 53%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-500/vocab.json (deflated 59%)\n",
            "  adding: content/out/emotion/gpt2_custom/checkpoint-500/pytorch_model.bin (deflated 9%)\n",
            "  adding: content/out/emotion/gpt2_custom/pytorch_model.bin (deflated 9%)\n",
            "  adding: content/out/emotion/gpt2/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2000/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2000/scheduler.pt (deflated 49%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2000/rng_state.pth (deflated 28%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2000/config.json (deflated 56%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2000/tokenizer_config.json (deflated 41%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2000/trainer_state.json (deflated 80%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2000/tokenizer.json (deflated 72%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2000/optimizer.pt (deflated 29%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2000/training_args.bin (deflated 48%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2000/special_tokens_map.json (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2000/merges.txt (deflated 53%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2000/vocab.json (deflated 59%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2000/pytorch_model.bin (deflated 9%)\n",
            "  adding: content/out/emotion/gpt2/config.json (deflated 56%)\n",
            "  adding: content/out/emotion/gpt2/all_results.json (deflated 55%)\n",
            "  adding: content/out/emotion/gpt2/predict_results_None.txt (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2/tokenizer_config.json (deflated 41%)\n",
            "  adding: content/out/emotion/gpt2/trainer_state.json (deflated 81%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1500/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1500/scheduler.pt (deflated 49%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1500/rng_state.pth (deflated 28%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1500/config.json (deflated 56%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1500/tokenizer_config.json (deflated 41%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1500/trainer_state.json (deflated 78%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1500/tokenizer.json (deflated 72%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1500/optimizer.pt (deflated 29%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1500/training_args.bin (deflated 48%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1500/special_tokens_map.json (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1500/merges.txt (deflated 53%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1500/vocab.json (deflated 59%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1500/pytorch_model.bin (deflated 9%)\n",
            "  adding: content/out/emotion/gpt2/train_results.json (deflated 41%)\n",
            "  adding: content/out/emotion/gpt2/tokenizer.json (deflated 72%)\n",
            "  adding: content/out/emotion/gpt2/eval_results.json (deflated 41%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2500/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2500/scheduler.pt (deflated 50%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2500/rng_state.pth (deflated 28%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2500/config.json (deflated 56%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2500/tokenizer_config.json (deflated 41%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2500/trainer_state.json (deflated 81%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2500/tokenizer.json (deflated 72%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2500/optimizer.pt (deflated 29%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2500/training_args.bin (deflated 48%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2500/special_tokens_map.json (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2500/merges.txt (deflated 53%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2500/vocab.json (deflated 59%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-2500/pytorch_model.bin (deflated 9%)\n",
            "  adding: content/out/emotion/gpt2/runs/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00/events.out.tfevents.1676411778.fc0011e45a00.70872.2 (deflated 28%)\n",
            "  adding: content/out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00/1676411348.7268953/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00/1676411348.7268953/events.out.tfevents.1676411348.fc0011e45a00.70872.1 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00/events.out.tfevents.1676411348.fc0011e45a00.70872.0 (deflated 63%)\n",
            "  adding: content/out/emotion/gpt2/runs/Feb14_20-34-05_fc0011e45a00/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2/runs/Feb14_20-34-05_fc0011e45a00/events.out.tfevents.1676407272.fc0011e45a00.50524.2 (deflated 28%)\n",
            "  adding: content/out/emotion/gpt2/runs/Feb14_20-34-05_fc0011e45a00/events.out.tfevents.1676406850.fc0011e45a00.50524.0 (deflated 63%)\n",
            "  adding: content/out/emotion/gpt2/runs/Feb14_20-34-05_fc0011e45a00/1676406850.2390406/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2/runs/Feb14_20-34-05_fc0011e45a00/1676406850.2390406/events.out.tfevents.1676406850.fc0011e45a00.50524.1 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2/runs/Feb14_19-44-33_fc0011e45a00/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2/runs/Feb14_19-44-33_fc0011e45a00/events.out.tfevents.1676403875.fc0011e45a00.37469.0 (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2/runs/Feb14_19-44-33_fc0011e45a00/1676403875.9091897/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2/runs/Feb14_19-44-33_fc0011e45a00/1676403875.9091897/events.out.tfevents.1676403875.fc0011e45a00.37469.1 (deflated 62%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1000/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1000/scheduler.pt (deflated 49%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1000/rng_state.pth (deflated 28%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1000/config.json (deflated 56%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1000/tokenizer_config.json (deflated 41%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1000/trainer_state.json (deflated 75%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1000/tokenizer.json (deflated 72%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1000/optimizer.pt (deflated 29%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1000/training_args.bin (deflated 48%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1000/special_tokens_map.json (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1000/merges.txt (deflated 53%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1000/vocab.json (deflated 59%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-1000/pytorch_model.bin (deflated 9%)\n",
            "  adding: content/out/emotion/gpt2/README.md (deflated 54%)\n",
            "  adding: content/out/emotion/gpt2/training_args.bin (deflated 48%)\n",
            "  adding: content/out/emotion/gpt2/special_tokens_map.json (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2/merges.txt (deflated 53%)\n",
            "  adding: content/out/emotion/gpt2/vocab.json (deflated 59%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-500/ (stored 0%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-500/scheduler.pt (deflated 49%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-500/rng_state.pth (deflated 28%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-500/config.json (deflated 56%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-500/tokenizer_config.json (deflated 41%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-500/trainer_state.json (deflated 67%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-500/tokenizer.json (deflated 72%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-500/optimizer.pt (deflated 30%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-500/training_args.bin (deflated 48%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-500/special_tokens_map.json (deflated 60%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-500/merges.txt (deflated 53%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-500/vocab.json (deflated 59%)\n",
            "  adding: content/out/emotion/gpt2/checkpoint-500/pytorch_model.bin (deflated 9%)\n",
            "  adding: content/out/emotion/gpt2/pytorch_model.bin\n",
            "\n",
            "\n",
            "zip error: Interrupted (aborting)\n"
          ]
        }
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.9.6"
    },
    "orig_nbformat": 4,
    "vscode": {
      "interpreter": {
        "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
      }
    },
    "colab": {
      "provenance": []
    },
    "accelerator": "GPU",
    "gpuClass": "premium",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "18f03144f5194bd2a88064eaae1140f0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_36b8333766d44ee2aaa8da8ee75975d2",
              "IPY_MODEL_f7a9b125cf1346468e428abd689ff800",
              "IPY_MODEL_9b9e6161874f41c98d5c5e55d8d4fc86"
            ],
            "layout": "IPY_MODEL_9925a6f17ba14eee96332f0ea1dc88e5"
          }
        },
        "36b8333766d44ee2aaa8da8ee75975d2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_dce64adfb8334591a8ce182918ecb4e3",
            "placeholder": "",
            "style": "IPY_MODEL_9efd8cd2208245aca3f369f0735e2ee1",
            "value": "Downloading (…)okenizer_config.json: 100%"
          }
        },
        "f7a9b125cf1346468e428abd689ff800": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_3d05704ffb0040c8b5bfb5c068c3329b",
            "max": 2539,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_9564dcdd10c64072bb09e70def311ff3",
            "value": 2539
          }
        },
        "9b9e6161874f41c98d5c5e55d8d4fc86": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f406c9b52a274068bd636554558497b2",
            "placeholder": "",
            "style": "IPY_MODEL_d97be50f8cc64f8680a6cce112863255",
            "value": " 2.54k/2.54k [00:00&lt;00:00, 125kB/s]"
          }
        },
        "9925a6f17ba14eee96332f0ea1dc88e5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "dce64adfb8334591a8ce182918ecb4e3": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9efd8cd2208245aca3f369f0735e2ee1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "3d05704ffb0040c8b5bfb5c068c3329b": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9564dcdd10c64072bb09e70def311ff3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "f406c9b52a274068bd636554558497b2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d97be50f8cc64f8680a6cce112863255": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "7d6b88e56dad4dcbb0f1b1720f1ff118": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_eabc78cbdeef40feb36cf90fdbcdfbc7",
              "IPY_MODEL_6477d99dffbc4cf39e2c6998f71e37f7",
              "IPY_MODEL_d63511a8852942309cabe53720939fcc"
            ],
            "layout": "IPY_MODEL_3096b59f64eb48659a8eedea5a171be4"
          }
        },
        "eabc78cbdeef40feb36cf90fdbcdfbc7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_acc58b06f3b54801b10ee872fab39e6e",
            "placeholder": "",
            "style": "IPY_MODEL_c2bd9c9ddab848529e52adfdc7634044",
            "value": "Downloading (…)&quot;spiece.model&quot;;: 100%"
          }
        },
        "6477d99dffbc4cf39e2c6998f71e37f7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8d7e8c29d7e247f1b55d329d40508526",
            "max": 791656,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_457b70adcab0464c9f990b13f433c635",
            "value": 791656
          }
        },
        "d63511a8852942309cabe53720939fcc": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_0858fe327ec549b488f6169de1d84654",
            "placeholder": "",
            "style": "IPY_MODEL_e18a505153c7491f8900142fb1189cd7",
            "value": " 792k/792k [00:00&lt;00:00, 8.08MB/s]"
          }
        },
        "3096b59f64eb48659a8eedea5a171be4": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "acc58b06f3b54801b10ee872fab39e6e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c2bd9c9ddab848529e52adfdc7634044": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "8d7e8c29d7e247f1b55d329d40508526": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "457b70adcab0464c9f990b13f433c635": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "0858fe327ec549b488f6169de1d84654": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e18a505153c7491f8900142fb1189cd7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "945026e5e11448b39ab37fb2a0bd963c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_8c3aa97d58cb4f21b59af6253c952859",
              "IPY_MODEL_848ff807a83c4a79a1b3d7d80c29499c",
              "IPY_MODEL_a7b1f6722fcd4e90811041b24df0fe7b"
            ],
            "layout": "IPY_MODEL_f815d05091814c39a467cd8f528db504"
          }
        },
        "8c3aa97d58cb4f21b59af6253c952859": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_915449ab41d848d39d801b4feb932a4f",
            "placeholder": "",
            "style": "IPY_MODEL_2937b015455647abb7a524f858a881d2",
            "value": "Downloading (…)/main/tokenizer.json: 100%"
          }
        },
        "848ff807a83c4a79a1b3d7d80c29499c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c2b6cda9a8e94f7e97d7fb032b8e2bc5",
            "max": 2424064,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_af885a022ad743098e5037e1c8dc760a",
            "value": 2424064
          }
        },
        "a7b1f6722fcd4e90811041b24df0fe7b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_088ec36aff7f415abfc4fd926fa0f902",
            "placeholder": "",
            "style": "IPY_MODEL_b1b99d863dc64208afc11416d4936c2c",
            "value": " 2.42M/2.42M [00:00&lt;00:00, 18.4MB/s]"
          }
        },
        "f815d05091814c39a467cd8f528db504": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "915449ab41d848d39d801b4feb932a4f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "2937b015455647abb7a524f858a881d2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "c2b6cda9a8e94f7e97d7fb032b8e2bc5": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "af885a022ad743098e5037e1c8dc760a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "088ec36aff7f415abfc4fd926fa0f902": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b1b99d863dc64208afc11416d4936c2c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "cb9e02be7ec44f6bb6b8771691c114e4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_f68a247bddf9484e9f7b1666802f4612",
              "IPY_MODEL_d8d89ac972084304bff515a16e009452",
              "IPY_MODEL_3495b00846ae49acbb0cf3e15edf361e"
            ],
            "layout": "IPY_MODEL_60f6f23e78ce4ee2abf7389ab936c3ac"
          }
        },
        "f68a247bddf9484e9f7b1666802f4612": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_9d428e02c4134510baf179ce9137d90c",
            "placeholder": "",
            "style": "IPY_MODEL_5298f4cd4e2e404ea66d70c62bcfe439",
            "value": "Downloading (…)cial_tokens_map.json: 100%"
          }
        },
        "d8d89ac972084304bff515a16e009452": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_cd9fdc3eb94a4d00b5af6115318dcf45",
            "max": 2201,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_d664c674a977456cad109347c0206d0e",
            "value": 2201
          }
        },
        "3495b00846ae49acbb0cf3e15edf361e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_17e5dedc0aeb4a1da32113e51158fd74",
            "placeholder": "",
            "style": "IPY_MODEL_9b70ec9f110f4080a6a26fd12044fe94",
            "value": " 2.20k/2.20k [00:00&lt;00:00, 160kB/s]"
          }
        },
        "60f6f23e78ce4ee2abf7389ab936c3ac": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9d428e02c4134510baf179ce9137d90c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "5298f4cd4e2e404ea66d70c62bcfe439": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "cd9fdc3eb94a4d00b5af6115318dcf45": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d664c674a977456cad109347c0206d0e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "17e5dedc0aeb4a1da32113e51158fd74": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9b70ec9f110f4080a6a26fd12044fe94": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}