UG-final/all_models.ipynb
2023-02-14 23:44:20 +01:00

6530 lines
431 KiB
Plaintext
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "svk2qSrl7ICc"
},
"source": [
"# **Uczenie Głębokie - projekt**\n",
"W projekcie wykorzystano dataset [emotion](https://huggingface.co/datasets/emotion), zawierający wpisy nacechowane określonymi emocjami.\n",
"\n",
"<br>\n",
"\n",
"Labels:\n",
"- 0 - sadness\n",
"- 1 - joy\n",
"- 2 - love\n",
"- 3 - anger\n",
"- 4 - fear\n",
"- 5 - surprise"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "wJ30OIAM7ICf"
},
"source": [
"### **REQUIREMENTS**"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "XkE5ENXV7ICf",
"outputId": "68ec24ee-8dcd-48b7-c0ce-3d18c1b9bcd6"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: transformers in /usr/local/lib/python3.8/dist-packages (4.23.1)\n",
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.8/dist-packages (1.2.1)\n",
"Requirement already satisfied: accelerate in /usr/local/lib/python3.8/dist-packages (0.16.0)\n",
"Requirement already satisfied: evaluate in /usr/local/lib/python3.8/dist-packages (0.4.0)\n",
"Requirement already satisfied: datasets in /usr/local/lib/python3.8/dist-packages (2.9.0)\n",
"Requirement already satisfied: torch in /usr/local/lib/python3.8/dist-packages (1.13.1)\n",
"Requirement already satisfied: sentencepiece in /usr/local/lib/python3.8/dist-packages (0.1.97)\n",
"Requirement already satisfied: torchvision in /usr/local/lib/python3.8/dist-packages (0.14.1+cu116)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from transformers) (3.9.0)\n",
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.8/dist-packages (from transformers) (4.64.1)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from transformers) (1.21.6)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from transformers) (0.12.0)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.8/dist-packages (from transformers) (2022.6.2)\n",
"Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.8/dist-packages (from transformers) (0.13.2)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.8/dist-packages (from transformers) (2.25.1)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.8/dist-packages (from transformers) (6.0)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from transformers) (23.0)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (3.1.0)\n",
"Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.2.0)\n",
"Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.7.3)\n",
"Requirement already satisfied: psutil in /usr/local/lib/python3.8/dist-packages (from accelerate) (5.4.8)\n",
"Requirement already satisfied: dill in /usr/local/lib/python3.8/dist-packages (from evaluate) (0.3.6)\n",
"Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.8/dist-packages (from evaluate) (0.18.0)\n",
"Requirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.8/dist-packages (from evaluate) (2023.1.0)\n",
"Requirement already satisfied: xxhash in /usr/local/lib/python3.8/dist-packages (from evaluate) (3.2.0)\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.8/dist-packages (from evaluate) (1.3.5)\n",
"Requirement already satisfied: multiprocess in /usr/local/lib/python3.8/dist-packages (from evaluate) (0.70.14)\n",
"Requirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.8/dist-packages (from datasets) (9.0.0)\n",
"Requirement already satisfied: aiohttp in /usr/local/lib/python3.8/dist-packages (from datasets) (3.8.3)\n",
"Requirement already satisfied: nvidia-cublas-cu11==11.10.3.66 in /usr/local/lib/python3.8/dist-packages (from torch) (11.10.3.66)\n",
"Requirement already satisfied: nvidia-cuda-runtime-cu11==11.7.99 in /usr/local/lib/python3.8/dist-packages (from torch) (11.7.99)\n",
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.8/dist-packages (from torch) (4.4.0)\n",
"Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.7.99 in /usr/local/lib/python3.8/dist-packages (from torch) (11.7.99)\n",
"Requirement already satisfied: nvidia-cudnn-cu11==8.5.0.96 in /usr/local/lib/python3.8/dist-packages (from torch) (8.5.0.96)\n",
"Requirement already satisfied: wheel in /usr/local/lib/python3.8/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch) (0.38.4)\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.8/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch) (57.4.0)\n",
"Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.8/dist-packages (from torchvision) (7.1.2)\n",
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (4.0.2)\n",
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.8.2)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (6.0.4)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.3)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.1)\n",
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (22.2.0)\n",
"Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (2.1.1)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (2022.12.7)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (1.26.14)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (2.10)\n",
"Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (4.0.0)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas->evaluate) (2.8.2)\n",
"Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas->evaluate) (2022.7.1)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.8/dist-packages (from python-dateutil>=2.7.3->pandas->evaluate) (1.15.0)\n"
]
}
],
"source": [
"!pip3 install transformers scikit-learn accelerate evaluate datasets torch sentencepiece torchvision"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "MrV5G1gW7ICg"
},
"outputs": [],
"source": [
"import os\n",
"import json\n",
"from pathlib import Path\n",
"from typing import Dict, List\n",
"from datasets import load_dataset\n",
"import torch\n",
"import pandas as pd\n",
"\n",
"os.environ['TOKENIZERS_PARALLELISM'] = 'true'"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Y107u4JG7ICh"
},
"source": [
"### **DATA PREP**"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "PmgAAQFV7ICh",
"outputId": "e6f4f065-4d0d-4102-d96a-c5ca791dd113"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"No config specified, defaulting to: emotion/split\n",
"Found cached dataset emotion (/root/.cache/huggingface/datasets/emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd)\n",
"\r 0% 0/3 [00:00<?, ?it/s]\r100% 3/3 [00:00<00:00, 182.77it/s]\n",
"Saving into: data/train.json\n",
"Saving into: data/s2s-train.json\n",
"Saving into: data/valid.json\n",
"Saving into: data/s2s-valid.json\n",
"Saving into: data/test.json\n",
"Saving into: data/s2s-test.json\n"
]
}
],
"source": [
"!mkdir -p data\n",
"!python data_prep.py"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Fv0h2-MW7ICh",
"outputId": "ab7744f0-38e1-4415-f9e0-dbb182583e83"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\"label\": 0, \"text\": \"i didnt feel humiliated\"}\n",
"{\"label\": 0, \"text\": \"i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake\"}\n",
"{\"label\": 3, \"text\": \"im grabbing a minute to post i feel greedy wrong\"}\n",
"{\"label\": 2, \"text\": \"i am ever feeling nostalgic about the fireplace i will know that it is still on the property\"}\n",
"{\"label\": 3, \"text\": \"i am feeling grouchy\"}\n",
"{\"label\": 0, \"text\": \"ive been feeling a little burdened lately wasnt sure why that was\"}\n",
"{\"label\": 5, \"text\": \"ive been taking or milligrams or times recommended amount and ive fallen asleep a lot faster but i also feel like so funny\"}\n",
"{\"label\": 4, \"text\": \"i feel as confused about life as a teenager or as jaded as a year old man\"}\n",
"{\"label\": 1, \"text\": \"i have been with petronas for years i feel that petronas has performed well and made a huge profit\"}\n",
"{\"label\": 2, \"text\": \"i feel romantic too\"}\n"
]
}
],
"source": [
"!head data/train.json"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "6XHKYEod7ICi",
"outputId": "75b8480e-159a-4968-b0cc-3605680f7410"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\"label\": \"sadness\", \"text\": \"i didnt feel humiliated\"}\n",
"{\"label\": \"sadness\", \"text\": \"i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake\"}\n",
"{\"label\": \"anger\", \"text\": \"im grabbing a minute to post i feel greedy wrong\"}\n",
"{\"label\": \"love\", \"text\": \"i am ever feeling nostalgic about the fireplace i will know that it is still on the property\"}\n",
"{\"label\": \"anger\", \"text\": \"i am feeling grouchy\"}\n",
"{\"label\": \"sadness\", \"text\": \"ive been feeling a little burdened lately wasnt sure why that was\"}\n",
"{\"label\": \"surprise\", \"text\": \"ive been taking or milligrams or times recommended amount and ive fallen asleep a lot faster but i also feel like so funny\"}\n",
"{\"label\": \"fear\", \"text\": \"i feel as confused about life as a teenager or as jaded as a year old man\"}\n",
"{\"label\": \"joy\", \"text\": \"i have been with petronas for years i feel that petronas has performed well and made a huge profit\"}\n",
"{\"label\": \"love\", \"text\": \"i feel romantic too\"}\n"
]
}
],
"source": [
"!head data/s2s-train.json"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "BtTwN0yz7ICj",
"outputId": "5818cd0b-56eb-4a0f-cada-cf89d03f8d9c"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" 2000 data/s2s-test.json\n",
" 16000 data/s2s-train.json\n",
" 2000 data/s2s-valid.json\n",
" 2000 data/test.json\n",
" 16000 data/train.json\n",
" 2000 data/valid.json\n",
" 40000 total\n"
]
}
],
"source": [
"!wc -l data/*"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "jaX7Iyck7ICk"
},
"source": [
"## **ROBERTA**"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "bPBy_20B7ICk"
},
"source": [
"- full data\n",
"- model `roberta-base`\n",
"- sequnece length: 128\n",
"- training epoch: 1"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"id": "C5TetFI_7ICk",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "b83e8452-3eb2-4230-f19e-55fe8a830f4e"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"2023-02-14 21:44:57.299984: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2023-02-14 21:44:57.452345: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
"2023-02-14 21:44:58.236913: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 21:44:58.237017: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 21:44:58.237058: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
"WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
"INFO:__main__:Training/evaluation parameters TrainingArguments(\n",
"_n_gpu=1,\n",
"adafactor=False,\n",
"adam_beta1=0.9,\n",
"adam_beta2=0.999,\n",
"adam_epsilon=1e-08,\n",
"auto_find_batch_size=False,\n",
"bf16=False,\n",
"bf16_full_eval=False,\n",
"data_seed=None,\n",
"dataloader_drop_last=False,\n",
"dataloader_num_workers=0,\n",
"dataloader_pin_memory=True,\n",
"ddp_bucket_cap_mb=None,\n",
"ddp_find_unused_parameters=None,\n",
"ddp_timeout=1800,\n",
"debug=[],\n",
"deepspeed=None,\n",
"disable_tqdm=False,\n",
"do_eval=True,\n",
"do_predict=True,\n",
"do_train=True,\n",
"eval_accumulation_steps=None,\n",
"eval_delay=0,\n",
"eval_steps=None,\n",
"evaluation_strategy=no,\n",
"fp16=False,\n",
"fp16_backend=auto,\n",
"fp16_full_eval=False,\n",
"fp16_opt_level=O1,\n",
"fsdp=[],\n",
"fsdp_min_num_params=0,\n",
"fsdp_transformer_layer_cls_to_wrap=None,\n",
"full_determinism=False,\n",
"gradient_accumulation_steps=1,\n",
"gradient_checkpointing=False,\n",
"greater_is_better=None,\n",
"group_by_length=False,\n",
"half_precision_backend=auto,\n",
"hub_model_id=None,\n",
"hub_private_repo=False,\n",
"hub_strategy=every_save,\n",
"hub_token=<HUB_TOKEN>,\n",
"ignore_data_skip=False,\n",
"include_inputs_for_metrics=False,\n",
"jit_mode_eval=False,\n",
"label_names=None,\n",
"label_smoothing_factor=0.0,\n",
"learning_rate=2e-05,\n",
"length_column_name=length,\n",
"load_best_model_at_end=False,\n",
"local_rank=-1,\n",
"log_level=passive,\n",
"log_level_replica=passive,\n",
"log_on_each_node=True,\n",
"logging_dir=out/emotion/roberta/runs/Feb14_21-45-00_fc0011e45a00,\n",
"logging_first_step=False,\n",
"logging_nan_inf_filter=True,\n",
"logging_steps=500,\n",
"logging_strategy=steps,\n",
"lr_scheduler_type=linear,\n",
"max_grad_norm=1.0,\n",
"max_steps=-1,\n",
"metric_for_best_model=None,\n",
"mp_parameters=,\n",
"no_cuda=False,\n",
"num_train_epochs=1.0,\n",
"optim=adamw_hf,\n",
"output_dir=out/emotion/roberta,\n",
"overwrite_output_dir=True,\n",
"past_index=-1,\n",
"per_device_eval_batch_size=24,\n",
"per_device_train_batch_size=24,\n",
"prediction_loss_only=False,\n",
"push_to_hub=False,\n",
"push_to_hub_model_id=None,\n",
"push_to_hub_organization=None,\n",
"push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
"ray_scope=last,\n",
"remove_unused_columns=True,\n",
"report_to=['tensorboard'],\n",
"resume_from_checkpoint=None,\n",
"run_name=out/emotion/roberta,\n",
"save_on_each_node=False,\n",
"save_steps=500,\n",
"save_strategy=steps,\n",
"save_total_limit=None,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torchdynamo=None,\n",
"tpu_metrics_debug=False,\n",
"tpu_num_cores=None,\n",
"use_ipex=False,\n",
"use_legacy_prediction_loop=False,\n",
"use_mps_device=False,\n",
"warmup_ratio=0.0,\n",
"warmup_steps=0,\n",
"weight_decay=0.0,\n",
"xpu_backend=None,\n",
")\n",
"INFO:__main__:load a local file for train: data/train.json\n",
"INFO:__main__:load a local file for validation: data/valid.json\n",
"INFO:__main__:load a local file for test: data/test.json\n",
"WARNING:datasets.builder:Using custom data configuration default-01aa9d8252a24a0d\n",
"INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n",
"INFO:datasets.builder:Generating dataset json (/content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
"Downloading and preparing dataset json/default to /content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n",
"Downloading data files: 100% 3/3 [00:00<00:00, 11491.24it/s]\n",
"INFO:datasets.download.download_manager:Downloading took 0.0 min\n",
"INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n",
"Extracting data files: 100% 3/3 [00:00<00:00, 1882.54it/s]\n",
"INFO:datasets.utils.info_utils:Unable to verify checksums.\n",
"INFO:datasets.builder:Generating train split\n",
"INFO:datasets.builder:Generating validation split\n",
"INFO:datasets.builder:Generating test split\n",
"INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n",
"Dataset json downloaded and prepared to /content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n",
"100% 3/3 [00:00<00:00, 573.49it/s]\n",
"Downloading (…)lve/main/config.json: 100% 481/481 [00:00<00:00, 83.8kB/s]\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:45:01,575 >> loading configuration file config.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:45:01,576 >> Model config RobertaConfig {\n",
" \"_name_or_path\": \"roberta-base\",\n",
" \"architectures\": [\n",
" \"RobertaForMaskedLM\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"bos_token_id\": 0,\n",
" \"classifier_dropout\": null,\n",
" \"eos_token_id\": 2,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"id2label\": {\n",
" \"0\": \"LABEL_0\",\n",
" \"1\": \"LABEL_1\",\n",
" \"2\": \"LABEL_2\",\n",
" \"3\": \"LABEL_3\",\n",
" \"4\": \"LABEL_4\",\n",
" \"5\": \"LABEL_5\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"label2id\": {\n",
" \"LABEL_0\": 0,\n",
" \"LABEL_1\": 1,\n",
" \"LABEL_2\": 2,\n",
" \"LABEL_3\": 3,\n",
" \"LABEL_4\": 4,\n",
" \"LABEL_5\": 5\n",
" },\n",
" \"layer_norm_eps\": 1e-05,\n",
" \"max_position_embeddings\": 514,\n",
" \"model_type\": \"roberta\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"pad_token_id\": 1,\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 1,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50265\n",
"}\n",
"\n",
"[INFO|tokenization_auto.py:418] 2023-02-14 21:45:01,670 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:45:01,762 >> loading configuration file config.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:45:01,763 >> Model config RobertaConfig {\n",
" \"_name_or_path\": \"roberta-base\",\n",
" \"architectures\": [\n",
" \"RobertaForMaskedLM\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"bos_token_id\": 0,\n",
" \"classifier_dropout\": null,\n",
" \"eos_token_id\": 2,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"layer_norm_eps\": 1e-05,\n",
" \"max_position_embeddings\": 514,\n",
" \"model_type\": \"roberta\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"pad_token_id\": 1,\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 1,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50265\n",
"}\n",
"\n",
"Downloading (…)olve/main/vocab.json: 100% 899k/899k [00:00<00:00, 9.36MB/s]\n",
"Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 4.95MB/s]\n",
"Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 11.7MB/s]\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,975 >> loading file vocab.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/vocab.json\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file merges.txt from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/merges.txt\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file tokenizer.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/tokenizer.json\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file added_tokens.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file special_tokens_map.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file tokenizer_config.json from cache at None\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:45:02,976 >> loading configuration file config.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:45:02,977 >> Model config RobertaConfig {\n",
" \"_name_or_path\": \"roberta-base\",\n",
" \"architectures\": [\n",
" \"RobertaForMaskedLM\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"bos_token_id\": 0,\n",
" \"classifier_dropout\": null,\n",
" \"eos_token_id\": 2,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"layer_norm_eps\": 1e-05,\n",
" \"max_position_embeddings\": 514,\n",
" \"model_type\": \"roberta\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"pad_token_id\": 1,\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 1,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50265\n",
"}\n",
"\n",
"INFO:__main__:Using implementation from class: AutoModelForSequenceClassification\n",
"Downloading (…)\"pytorch_model.bin\";: 100% 501M/501M [00:04<00:00, 105MB/s]\n",
"[INFO|modeling_utils.py:2156] 2023-02-14 21:45:08,072 >> loading weights file pytorch_model.bin from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/pytorch_model.bin\n",
"[WARNING|modeling_utils.py:2596] 2023-02-14 21:45:09,415 >> Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight']\n",
"- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"[WARNING|modeling_utils.py:2608] 2023-02-14 21:45:09,415 >> Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
"\n",
"\n",
"Frozen layers:\n",
"[('roberta.encoder.layer.0.attention.self.query.weight', False), ('roberta.encoder.layer.0.attention.self.query.bias', False), ('roberta.encoder.layer.0.attention.self.key.weight', False), ('roberta.encoder.layer.0.attention.self.key.bias', False), ('roberta.encoder.layer.0.attention.self.value.weight', False), ('roberta.encoder.layer.0.attention.self.value.bias', False), ('roberta.encoder.layer.0.attention.output.dense.weight', False), ('roberta.encoder.layer.0.attention.output.dense.bias', False), ('roberta.encoder.layer.0.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.0.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.0.intermediate.dense.weight', False), ('roberta.encoder.layer.0.intermediate.dense.bias', False), ('roberta.encoder.layer.0.output.dense.weight', False), ('roberta.encoder.layer.0.output.dense.bias', False), ('roberta.encoder.layer.0.output.LayerNorm.weight', False), ('roberta.encoder.layer.0.output.LayerNorm.bias', False), ('roberta.encoder.layer.2.attention.self.query.weight', False), ('roberta.encoder.layer.2.attention.self.query.bias', False), ('roberta.encoder.layer.2.attention.self.key.weight', False), ('roberta.encoder.layer.2.attention.self.key.bias', False), ('roberta.encoder.layer.2.attention.self.value.weight', False), ('roberta.encoder.layer.2.attention.self.value.bias', False), ('roberta.encoder.layer.2.attention.output.dense.weight', False), ('roberta.encoder.layer.2.attention.output.dense.bias', False), ('roberta.encoder.layer.2.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.2.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.2.intermediate.dense.weight', False), ('roberta.encoder.layer.2.intermediate.dense.bias', False), ('roberta.encoder.layer.2.output.dense.weight', False), ('roberta.encoder.layer.2.output.dense.bias', False), ('roberta.encoder.layer.2.output.LayerNorm.weight', False), ('roberta.encoder.layer.2.output.LayerNorm.bias', False), ('roberta.encoder.layer.4.attention.self.query.weight', False), ('roberta.encoder.layer.4.attention.self.query.bias', False), ('roberta.encoder.layer.4.attention.self.key.weight', False), ('roberta.encoder.layer.4.attention.self.key.bias', False), ('roberta.encoder.layer.4.attention.self.value.weight', False), ('roberta.encoder.layer.4.attention.self.value.bias', False), ('roberta.encoder.layer.4.attention.output.dense.weight', False), ('roberta.encoder.layer.4.attention.output.dense.bias', False), ('roberta.encoder.layer.4.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.4.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.4.intermediate.dense.weight', False), ('roberta.encoder.layer.4.intermediate.dense.bias', False), ('roberta.encoder.layer.4.output.dense.weight', False), ('roberta.encoder.layer.4.output.dense.bias', False), ('roberta.encoder.layer.4.output.LayerNorm.weight', False), ('roberta.encoder.layer.4.output.LayerNorm.bias', False), ('roberta.encoder.layer.6.attention.self.query.weight', False), ('roberta.encoder.layer.6.attention.self.query.bias', False), ('roberta.encoder.layer.6.attention.self.key.weight', False), ('roberta.encoder.layer.6.attention.self.key.bias', False), ('roberta.encoder.layer.6.attention.self.value.weight', False), ('roberta.encoder.layer.6.attention.self.value.bias', False), ('roberta.encoder.layer.6.attention.output.dense.weight', False), ('roberta.encoder.layer.6.attention.output.dense.bias', False), ('roberta.encoder.layer.6.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.6.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.6.intermediate.dense.weight', False), ('roberta.encoder.layer.6.intermediate.dense.bias', False), ('roberta.encoder.layer.6.output.dense.weight', False), ('roberta.encoder.layer.6.output.dense.bias', False), ('roberta.encoder.layer.6.output.LayerNorm.weight', False), ('roberta.encoder.layer.6.output.LayerNorm.bias', False), ('roberta.encoder.layer.8.attention.self.query.weight', False), ('roberta.encoder.layer.8.attention.self.query.bias', False), ('roberta.encoder.layer.8.attention.self.key.weight', False), ('roberta.encoder.layer.8.attention.self.key.bias', False), ('roberta.encoder.layer.8.attention.self.value.weight', False), ('roberta.encoder.layer.8.attention.self.value.bias', False), ('roberta.encoder.layer.8.attention.output.dense.weight', False), ('roberta.encoder.layer.8.attention.output.dense.bias', False), ('roberta.encoder.layer.8.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.8.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.8.intermediate.dense.weight', False), ('roberta.encoder.layer.8.intermediate.dense.bias', False), ('roberta.encoder.layer.8.output.dense.weight', False), ('roberta.encoder.layer.8.output.dense.bias', False), ('roberta.encoder.layer.8.output.LayerNorm.weight', False), ('roberta.encoder.layer.8.output.LayerNorm.bias', False), ('roberta.encoder.layer.10.attention.self.query.weight', False), ('roberta.encoder.layer.10.attention.self.query.bias', False), ('roberta.encoder.layer.10.attention.self.key.weight', False), ('roberta.encoder.layer.10.attention.self.key.bias', False), ('roberta.encoder.layer.10.attention.self.value.weight', False), ('roberta.encoder.layer.10.attention.self.value.bias', False), ('roberta.encoder.layer.10.attention.output.dense.weight', False), ('roberta.encoder.layer.10.attention.output.dense.bias', False), ('roberta.encoder.layer.10.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.10.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.10.intermediate.dense.weight', False), ('roberta.encoder.layer.10.intermediate.dense.bias', False), ('roberta.encoder.layer.10.output.dense.weight', False), ('roberta.encoder.layer.10.output.dense.bias', False), ('roberta.encoder.layer.10.output.LayerNorm.weight', False), ('roberta.encoder.layer.10.output.LayerNorm.bias', False)] \n",
"\n",
"\n",
"Running tokenizer on dataset: 0% 0/16 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-e62b2012f3f40cb2.arrow\n",
"Running tokenizer on dataset: 100% 16/16 [00:00<00:00, 20.66ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-cd497527f5c67ba7.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 7.58ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-9c2deb15eb4326c1.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 20.81ba/s]\n",
"INFO:__main__:Sample 10476 of the training set: {'label': 0, 'text': 'i do find new friends i m going to try extra hard to make them stay and if i decide that i don t want to feel hurt again and just ride out the last year of school on my own i m going to have to try extra hard not to care what people think of me being a loner', 'input_ids': [0, 118, 109, 465, 92, 964, 939, 475, 164, 7, 860, 1823, 543, 7, 146, 106, 1095, 8, 114, 939, 2845, 14, 939, 218, 326, 236, 7, 619, 2581, 456, 8, 95, 3068, 66, 5, 94, 76, 9, 334, 15, 127, 308, 939, 475, 164, 7, 33, 7, 860, 1823, 543, 45, 7, 575, 99, 82, 206, 9, 162, 145, 10, 784, 9604, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 1824 of the training set: {'label': 1, 'text': 'i asked them to join me in creating a world where all year old girls could grow up feeling hopeful and powerful', 'input_ids': [0, 118, 553, 106, 7, 1962, 162, 11, 2351, 10, 232, 147, 70, 76, 793, 1972, 115, 1733, 62, 2157, 7917, 8, 2247, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 409 of the training set: {'label': 2, 'text': 'i feel when you are a caring person you attract other caring people into your life', 'input_ids': [0, 118, 619, 77, 47, 32, 10, 10837, 621, 47, 5696, 97, 10837, 82, 88, 110, 301, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"[INFO|trainer.py:725] 2023-02-14 21:45:13,102 >> The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`, you can safely ignore this message.\n",
"/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" warnings.warn(\n",
"[INFO|trainer.py:1607] 2023-02-14 21:45:13,109 >> ***** Running training *****\n",
"[INFO|trainer.py:1608] 2023-02-14 21:45:13,109 >> Num examples = 16000\n",
"[INFO|trainer.py:1609] 2023-02-14 21:45:13,109 >> Num Epochs = 1\n",
"[INFO|trainer.py:1610] 2023-02-14 21:45:13,109 >> Instantaneous batch size per device = 24\n",
"[INFO|trainer.py:1611] 2023-02-14 21:45:13,109 >> Total train batch size (w. parallel, distributed & accumulation) = 24\n",
"[INFO|trainer.py:1612] 2023-02-14 21:45:13,109 >> Gradient Accumulation steps = 1\n",
"[INFO|trainer.py:1613] 2023-02-14 21:45:13,109 >> Total optimization steps = 667\n",
"{'loss': 0.8083, 'learning_rate': 5.0074962518740634e-06, 'epoch': 0.75}\n",
" 75% 500/667 [00:58<00:19, 8.76it/s][INFO|trainer.py:2656] 2023-02-14 21:46:11,148 >> Saving model checkpoint to out/emotion/roberta/checkpoint-500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:46:11,149 >> Configuration saved in out/emotion/roberta/checkpoint-500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:46:12,047 >> Model weights saved in out/emotion/roberta/checkpoint-500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:46:12,048 >> tokenizer config file saved in out/emotion/roberta/checkpoint-500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:46:12,048 >> Special tokens file saved in out/emotion/roberta/checkpoint-500/special_tokens_map.json\n",
"100% 666/667 [01:19<00:00, 8.78it/s][INFO|trainer.py:1852] 2023-02-14 21:46:32,443 >> \n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"{'train_runtime': 79.3341, 'train_samples_per_second': 201.679, 'train_steps_per_second': 8.407, 'train_loss': 0.7161429089227359, 'epoch': 1.0}\n",
"100% 667/667 [01:19<00:00, 8.41it/s]\n",
"[INFO|trainer.py:2656] 2023-02-14 21:46:32,445 >> Saving model checkpoint to out/emotion/roberta\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:46:32,446 >> Configuration saved in out/emotion/roberta/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:46:33,422 >> Model weights saved in out/emotion/roberta/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:46:33,422 >> tokenizer config file saved in out/emotion/roberta/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:46:33,423 >> Special tokens file saved in out/emotion/roberta/special_tokens_map.json\n",
"***** train metrics *****\n",
" epoch = 1.0\n",
" train_loss = 0.7161\n",
" train_runtime = 0:01:19.33\n",
" train_samples = 16000\n",
" train_samples_per_second = 201.679\n",
" train_steps_per_second = 8.407\n",
"INFO:__main__:*** Evaluate ***\n",
"[INFO|trainer.py:725] 2023-02-14 21:46:33,524 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:46:33,526 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:46:33,526 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:46:33,526 >> Batch size = 24\n",
"100% 84/84 [00:03<00:00, 23.66it/s]\n",
"***** eval metrics *****\n",
" epoch = 1.0\n",
" eval_accuracy = 0.889\n",
" eval_loss = 0.3302\n",
" eval_runtime = 0:00:03.59\n",
" eval_samples = 2000\n",
" eval_samples_per_second = 556.411\n",
" eval_steps_per_second = 23.369\n",
"INFO:__main__:*** Predict ***\n",
"[INFO|trainer.py:725] 2023-02-14 21:46:37,124 >> The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:46:37,125 >> ***** Running Prediction *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:46:37,125 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:46:37,125 >> Batch size = 24\n",
"100% 84/84 [00:03<00:00, 23.68it/s]\n",
"INFO:__main__:***** Predict results None *****\n",
"[INFO|modelcard.py:444] 2023-02-14 21:46:40,840 >> Dropping the following result as it does not have all the necessary fields:\n",
"{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.8889999985694885}]}\n"
]
}
],
"source": [
"!python run_glue.py \\\n",
" --cache_dir roberta_training_cache \\\n",
" --model_name_or_path roberta-base \\\n",
" --train_file data/train.json \\\n",
" --validation_file data/valid.json \\\n",
" --test_file data/test.json \\\n",
" --per_device_train_batch_size 24 \\\n",
" --per_device_eval_batch_size 24 \\\n",
" --do_train \\\n",
" --do_eval \\\n",
" --do_predict \\\n",
" --max_seq_length 128 \\\n",
" --learning_rate 2e-5 \\\n",
" --num_train_epochs 1 \\\n",
" --output_dir out/emotion/roberta \\\n",
" --overwrite_output_dir"
]
},
{
"cell_type": "markdown",
"source": [
"- full data\n",
"- sequence length: 128\n",
"- leakyRelu instad of relu\n",
"- every other layer frozen\n",
"- custom head"
],
"metadata": {
"id": "b1iFFLFAf9PC"
}
},
{
"cell_type": "code",
"source": [
"!python run_glue.py \\\n",
" --cache_dir roberta_custom_training_cache \\\n",
" --model_name_or_path roberta-base \\\n",
" --custom_model roberta_custom \\\n",
" --train_file data/train.json \\\n",
" --validation_file data/valid.json \\\n",
" --test_file data/test.json \\\n",
" --per_device_train_batch_size 24 \\\n",
" --per_device_eval_batch_size 24 \\\n",
" --do_train \\\n",
" --do_eval \\\n",
" --do_predict \\\n",
" --max_seq_length 128 \\\n",
" --learning_rate 2e-5 \\\n",
" --num_train_epochs 1 \\\n",
" --output_dir out/emotion/roberta_custom \\\n",
" --overwrite_output_dir"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "WzRBwNKqkDAk",
"outputId": "8d042117-3af6-4041-d1a5-d70024df24fb"
},
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"2023-02-14 21:47:02.722049: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2023-02-14 21:47:02.876002: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
"2023-02-14 21:47:03.659342: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 21:47:03.659451: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 21:47:03.659470: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
"WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
"INFO:__main__:Training/evaluation parameters TrainingArguments(\n",
"_n_gpu=1,\n",
"adafactor=False,\n",
"adam_beta1=0.9,\n",
"adam_beta2=0.999,\n",
"adam_epsilon=1e-08,\n",
"auto_find_batch_size=False,\n",
"bf16=False,\n",
"bf16_full_eval=False,\n",
"data_seed=None,\n",
"dataloader_drop_last=False,\n",
"dataloader_num_workers=0,\n",
"dataloader_pin_memory=True,\n",
"ddp_bucket_cap_mb=None,\n",
"ddp_find_unused_parameters=None,\n",
"ddp_timeout=1800,\n",
"debug=[],\n",
"deepspeed=None,\n",
"disable_tqdm=False,\n",
"do_eval=True,\n",
"do_predict=True,\n",
"do_train=True,\n",
"eval_accumulation_steps=None,\n",
"eval_delay=0,\n",
"eval_steps=None,\n",
"evaluation_strategy=no,\n",
"fp16=False,\n",
"fp16_backend=auto,\n",
"fp16_full_eval=False,\n",
"fp16_opt_level=O1,\n",
"fsdp=[],\n",
"fsdp_min_num_params=0,\n",
"fsdp_transformer_layer_cls_to_wrap=None,\n",
"full_determinism=False,\n",
"gradient_accumulation_steps=1,\n",
"gradient_checkpointing=False,\n",
"greater_is_better=None,\n",
"group_by_length=False,\n",
"half_precision_backend=auto,\n",
"hub_model_id=None,\n",
"hub_private_repo=False,\n",
"hub_strategy=every_save,\n",
"hub_token=<HUB_TOKEN>,\n",
"ignore_data_skip=False,\n",
"include_inputs_for_metrics=False,\n",
"jit_mode_eval=False,\n",
"label_names=None,\n",
"label_smoothing_factor=0.0,\n",
"learning_rate=2e-05,\n",
"length_column_name=length,\n",
"load_best_model_at_end=False,\n",
"local_rank=-1,\n",
"log_level=passive,\n",
"log_level_replica=passive,\n",
"log_on_each_node=True,\n",
"logging_dir=out/emotion/roberta_custom/runs/Feb14_21-47-05_fc0011e45a00,\n",
"logging_first_step=False,\n",
"logging_nan_inf_filter=True,\n",
"logging_steps=500,\n",
"logging_strategy=steps,\n",
"lr_scheduler_type=linear,\n",
"max_grad_norm=1.0,\n",
"max_steps=-1,\n",
"metric_for_best_model=None,\n",
"mp_parameters=,\n",
"no_cuda=False,\n",
"num_train_epochs=1.0,\n",
"optim=adamw_hf,\n",
"output_dir=out/emotion/roberta_custom,\n",
"overwrite_output_dir=True,\n",
"past_index=-1,\n",
"per_device_eval_batch_size=24,\n",
"per_device_train_batch_size=24,\n",
"prediction_loss_only=False,\n",
"push_to_hub=False,\n",
"push_to_hub_model_id=None,\n",
"push_to_hub_organization=None,\n",
"push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
"ray_scope=last,\n",
"remove_unused_columns=True,\n",
"report_to=['tensorboard'],\n",
"resume_from_checkpoint=None,\n",
"run_name=out/emotion/roberta_custom,\n",
"save_on_each_node=False,\n",
"save_steps=500,\n",
"save_strategy=steps,\n",
"save_total_limit=None,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torchdynamo=None,\n",
"tpu_metrics_debug=False,\n",
"tpu_num_cores=None,\n",
"use_ipex=False,\n",
"use_legacy_prediction_loop=False,\n",
"use_mps_device=False,\n",
"warmup_ratio=0.0,\n",
"warmup_steps=0,\n",
"weight_decay=0.0,\n",
"xpu_backend=None,\n",
")\n",
"INFO:__main__:load a local file for train: data/train.json\n",
"INFO:__main__:load a local file for validation: data/valid.json\n",
"INFO:__main__:load a local file for test: data/test.json\n",
"WARNING:datasets.builder:Using custom data configuration default-01aa9d8252a24a0d\n",
"INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n",
"INFO:datasets.builder:Generating dataset json (/content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
"Downloading and preparing dataset json/default to /content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n",
"Downloading data files: 100% 3/3 [00:00<00:00, 14463.12it/s]\n",
"INFO:datasets.download.download_manager:Downloading took 0.0 min\n",
"INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n",
"Extracting data files: 100% 3/3 [00:00<00:00, 2119.76it/s]\n",
"INFO:datasets.utils.info_utils:Unable to verify checksums.\n",
"INFO:datasets.builder:Generating train split\n",
"INFO:datasets.builder:Generating validation split\n",
"INFO:datasets.builder:Generating test split\n",
"INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n",
"Dataset json downloaded and prepared to /content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n",
"100% 3/3 [00:00<00:00, 657.14it/s]\n",
"Downloading (…)lve/main/config.json: 100% 481/481 [00:00<00:00, 88.4kB/s]\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:47:06,896 >> loading configuration file config.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:47:06,897 >> Model config RobertaConfig {\n",
" \"_name_or_path\": \"roberta-base\",\n",
" \"architectures\": [\n",
" \"RobertaForMaskedLM\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"bos_token_id\": 0,\n",
" \"classifier_dropout\": null,\n",
" \"eos_token_id\": 2,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"id2label\": {\n",
" \"0\": \"LABEL_0\",\n",
" \"1\": \"LABEL_1\",\n",
" \"2\": \"LABEL_2\",\n",
" \"3\": \"LABEL_3\",\n",
" \"4\": \"LABEL_4\",\n",
" \"5\": \"LABEL_5\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"label2id\": {\n",
" \"LABEL_0\": 0,\n",
" \"LABEL_1\": 1,\n",
" \"LABEL_2\": 2,\n",
" \"LABEL_3\": 3,\n",
" \"LABEL_4\": 4,\n",
" \"LABEL_5\": 5\n",
" },\n",
" \"layer_norm_eps\": 1e-05,\n",
" \"max_position_embeddings\": 514,\n",
" \"model_type\": \"roberta\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"pad_token_id\": 1,\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 1,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50265\n",
"}\n",
"\n",
"[INFO|tokenization_auto.py:418] 2023-02-14 21:47:06,989 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:47:07,079 >> loading configuration file config.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:47:07,080 >> Model config RobertaConfig {\n",
" \"_name_or_path\": \"roberta-base\",\n",
" \"architectures\": [\n",
" \"RobertaForMaskedLM\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"bos_token_id\": 0,\n",
" \"classifier_dropout\": null,\n",
" \"eos_token_id\": 2,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"layer_norm_eps\": 1e-05,\n",
" \"max_position_embeddings\": 514,\n",
" \"model_type\": \"roberta\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"pad_token_id\": 1,\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 1,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50265\n",
"}\n",
"\n",
"Downloading (…)olve/main/vocab.json: 100% 899k/899k [00:00<00:00, 9.35MB/s]\n",
"Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 4.91MB/s]\n",
"Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 10.3MB/s]\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file vocab.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/vocab.json\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file merges.txt from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/merges.txt\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file tokenizer.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/tokenizer.json\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file added_tokens.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file special_tokens_map.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file tokenizer_config.json from cache at None\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:47:08,306 >> loading configuration file config.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:47:08,306 >> Model config RobertaConfig {\n",
" \"_name_or_path\": \"roberta-base\",\n",
" \"architectures\": [\n",
" \"RobertaForMaskedLM\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"bos_token_id\": 0,\n",
" \"classifier_dropout\": null,\n",
" \"eos_token_id\": 2,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"layer_norm_eps\": 1e-05,\n",
" \"max_position_embeddings\": 514,\n",
" \"model_type\": \"roberta\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"pad_token_id\": 1,\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 1,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50265\n",
"}\n",
"\n",
"INFO:__main__:Using hidden states in model: False\n",
"INFO:__main__:Using implementation from class: RobertaForSequenceClassificationCustomAlternative\n",
"Downloading (…)\"pytorch_model.bin\";: 100% 501M/501M [00:04<00:00, 106MB/s]\n",
"[INFO|modeling_utils.py:2156] 2023-02-14 21:47:13,300 >> loading weights file pytorch_model.bin from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/pytorch_model.bin\n",
"[WARNING|modeling_utils.py:2596] 2023-02-14 21:47:15,772 >> Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassificationCustomAlternative: ['roberta.pooler.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight']\n",
"- This IS expected if you are initializing RobertaForSequenceClassificationCustomAlternative from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing RobertaForSequenceClassificationCustomAlternative from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"[WARNING|modeling_utils.py:2608] 2023-02-14 21:47:15,772 >> Some weights of RobertaForSequenceClassificationCustomAlternative were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense_1_input.weight', 'classifier.dense_2.weight', 'classifier.out_proj.bias', 'classifier.dense_2.bias', 'classifier.dense_1_input.bias', 'classifier.dense_1_hidden.weight', 'classifier.dense_1_hidden.bias', 'classifier.out_proj.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
"\n",
"\n",
"Frozen layers:\n",
"[('roberta.encoder.layer.0.attention.self.query.weight', False), ('roberta.encoder.layer.0.attention.self.query.bias', False), ('roberta.encoder.layer.0.attention.self.key.weight', False), ('roberta.encoder.layer.0.attention.self.key.bias', False), ('roberta.encoder.layer.0.attention.self.value.weight', False), ('roberta.encoder.layer.0.attention.self.value.bias', False), ('roberta.encoder.layer.0.attention.output.dense.weight', False), ('roberta.encoder.layer.0.attention.output.dense.bias', False), ('roberta.encoder.layer.0.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.0.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.0.intermediate.dense.weight', False), ('roberta.encoder.layer.0.intermediate.dense.bias', False), ('roberta.encoder.layer.0.output.dense.weight', False), ('roberta.encoder.layer.0.output.dense.bias', False), ('roberta.encoder.layer.0.output.LayerNorm.weight', False), ('roberta.encoder.layer.0.output.LayerNorm.bias', False), ('roberta.encoder.layer.2.attention.self.query.weight', False), ('roberta.encoder.layer.2.attention.self.query.bias', False), ('roberta.encoder.layer.2.attention.self.key.weight', False), ('roberta.encoder.layer.2.attention.self.key.bias', False), ('roberta.encoder.layer.2.attention.self.value.weight', False), ('roberta.encoder.layer.2.attention.self.value.bias', False), ('roberta.encoder.layer.2.attention.output.dense.weight', False), ('roberta.encoder.layer.2.attention.output.dense.bias', False), ('roberta.encoder.layer.2.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.2.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.2.intermediate.dense.weight', False), ('roberta.encoder.layer.2.intermediate.dense.bias', False), ('roberta.encoder.layer.2.output.dense.weight', False), ('roberta.encoder.layer.2.output.dense.bias', False), ('roberta.encoder.layer.2.output.LayerNorm.weight', False), ('roberta.encoder.layer.2.output.LayerNorm.bias', False), ('roberta.encoder.layer.4.attention.self.query.weight', False), ('roberta.encoder.layer.4.attention.self.query.bias', False), ('roberta.encoder.layer.4.attention.self.key.weight', False), ('roberta.encoder.layer.4.attention.self.key.bias', False), ('roberta.encoder.layer.4.attention.self.value.weight', False), ('roberta.encoder.layer.4.attention.self.value.bias', False), ('roberta.encoder.layer.4.attention.output.dense.weight', False), ('roberta.encoder.layer.4.attention.output.dense.bias', False), ('roberta.encoder.layer.4.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.4.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.4.intermediate.dense.weight', False), ('roberta.encoder.layer.4.intermediate.dense.bias', False), ('roberta.encoder.layer.4.output.dense.weight', False), ('roberta.encoder.layer.4.output.dense.bias', False), ('roberta.encoder.layer.4.output.LayerNorm.weight', False), ('roberta.encoder.layer.4.output.LayerNorm.bias', False), ('roberta.encoder.layer.6.attention.self.query.weight', False), ('roberta.encoder.layer.6.attention.self.query.bias', False), ('roberta.encoder.layer.6.attention.self.key.weight', False), ('roberta.encoder.layer.6.attention.self.key.bias', False), ('roberta.encoder.layer.6.attention.self.value.weight', False), ('roberta.encoder.layer.6.attention.self.value.bias', False), ('roberta.encoder.layer.6.attention.output.dense.weight', False), ('roberta.encoder.layer.6.attention.output.dense.bias', False), ('roberta.encoder.layer.6.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.6.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.6.intermediate.dense.weight', False), ('roberta.encoder.layer.6.intermediate.dense.bias', False), ('roberta.encoder.layer.6.output.dense.weight', False), ('roberta.encoder.layer.6.output.dense.bias', False), ('roberta.encoder.layer.6.output.LayerNorm.weight', False), ('roberta.encoder.layer.6.output.LayerNorm.bias', False), ('roberta.encoder.layer.8.attention.self.query.weight', False), ('roberta.encoder.layer.8.attention.self.query.bias', False), ('roberta.encoder.layer.8.attention.self.key.weight', False), ('roberta.encoder.layer.8.attention.self.key.bias', False), ('roberta.encoder.layer.8.attention.self.value.weight', False), ('roberta.encoder.layer.8.attention.self.value.bias', False), ('roberta.encoder.layer.8.attention.output.dense.weight', False), ('roberta.encoder.layer.8.attention.output.dense.bias', False), ('roberta.encoder.layer.8.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.8.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.8.intermediate.dense.weight', False), ('roberta.encoder.layer.8.intermediate.dense.bias', False), ('roberta.encoder.layer.8.output.dense.weight', False), ('roberta.encoder.layer.8.output.dense.bias', False), ('roberta.encoder.layer.8.output.LayerNorm.weight', False), ('roberta.encoder.layer.8.output.LayerNorm.bias', False), ('roberta.encoder.layer.10.attention.self.query.weight', False), ('roberta.encoder.layer.10.attention.self.query.bias', False), ('roberta.encoder.layer.10.attention.self.key.weight', False), ('roberta.encoder.layer.10.attention.self.key.bias', False), ('roberta.encoder.layer.10.attention.self.value.weight', False), ('roberta.encoder.layer.10.attention.self.value.bias', False), ('roberta.encoder.layer.10.attention.output.dense.weight', False), ('roberta.encoder.layer.10.attention.output.dense.bias', False), ('roberta.encoder.layer.10.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.10.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.10.intermediate.dense.weight', False), ('roberta.encoder.layer.10.intermediate.dense.bias', False), ('roberta.encoder.layer.10.output.dense.weight', False), ('roberta.encoder.layer.10.output.dense.bias', False), ('roberta.encoder.layer.10.output.LayerNorm.weight', False), ('roberta.encoder.layer.10.output.LayerNorm.bias', False)] \n",
"\n",
"\n",
"Running tokenizer on dataset: 0% 0/16 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-e62b2012f3f40cb2.arrow\n",
"Running tokenizer on dataset: 100% 16/16 [00:01<00:00, 15.42ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-cd497527f5c67ba7.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 7.47ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-9c2deb15eb4326c1.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 19.76ba/s]\n",
"INFO:__main__:Sample 10476 of the training set: {'label': 0, 'text': 'i do find new friends i m going to try extra hard to make them stay and if i decide that i don t want to feel hurt again and just ride out the last year of school on my own i m going to have to try extra hard not to care what people think of me being a loner', 'input_ids': [0, 118, 109, 465, 92, 964, 939, 475, 164, 7, 860, 1823, 543, 7, 146, 106, 1095, 8, 114, 939, 2845, 14, 939, 218, 326, 236, 7, 619, 2581, 456, 8, 95, 3068, 66, 5, 94, 76, 9, 334, 15, 127, 308, 939, 475, 164, 7, 33, 7, 860, 1823, 543, 45, 7, 575, 99, 82, 206, 9, 162, 145, 10, 784, 9604, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 1824 of the training set: {'label': 1, 'text': 'i asked them to join me in creating a world where all year old girls could grow up feeling hopeful and powerful', 'input_ids': [0, 118, 553, 106, 7, 1962, 162, 11, 2351, 10, 232, 147, 70, 76, 793, 1972, 115, 1733, 62, 2157, 7917, 8, 2247, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 409 of the training set: {'label': 2, 'text': 'i feel when you are a caring person you attract other caring people into your life', 'input_ids': [0, 118, 619, 77, 47, 32, 10, 10837, 621, 47, 5696, 97, 10837, 82, 88, 110, 301, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"[INFO|trainer.py:725] 2023-02-14 21:47:19,642 >> The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassificationCustomAlternative.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassificationCustomAlternative.forward`, you can safely ignore this message.\n",
"/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" warnings.warn(\n",
"[INFO|trainer.py:1607] 2023-02-14 21:47:19,649 >> ***** Running training *****\n",
"[INFO|trainer.py:1608] 2023-02-14 21:47:19,649 >> Num examples = 16000\n",
"[INFO|trainer.py:1609] 2023-02-14 21:47:19,649 >> Num Epochs = 1\n",
"[INFO|trainer.py:1610] 2023-02-14 21:47:19,649 >> Instantaneous batch size per device = 24\n",
"[INFO|trainer.py:1611] 2023-02-14 21:47:19,649 >> Total train batch size (w. parallel, distributed & accumulation) = 24\n",
"[INFO|trainer.py:1612] 2023-02-14 21:47:19,649 >> Gradient Accumulation steps = 1\n",
"[INFO|trainer.py:1613] 2023-02-14 21:47:19,649 >> Total optimization steps = 667\n",
"{'loss': 0.8955, 'learning_rate': 5.0074962518740634e-06, 'epoch': 0.75}\n",
" 75% 500/667 [00:58<00:19, 8.75it/s][INFO|trainer.py:2656] 2023-02-14 21:48:17,996 >> Saving model checkpoint to out/emotion/roberta_custom/checkpoint-500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:48:17,997 >> Configuration saved in out/emotion/roberta_custom/checkpoint-500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:48:19,015 >> Model weights saved in out/emotion/roberta_custom/checkpoint-500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:48:19,016 >> tokenizer config file saved in out/emotion/roberta_custom/checkpoint-500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:48:19,016 >> Special tokens file saved in out/emotion/roberta_custom/checkpoint-500/special_tokens_map.json\n",
"100% 666/667 [01:20<00:00, 8.66it/s][INFO|trainer.py:1852] 2023-02-14 21:48:40,745 >> \n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"{'train_runtime': 81.0963, 'train_samples_per_second': 197.296, 'train_steps_per_second': 8.225, 'train_loss': 0.8004468377383573, 'epoch': 1.0}\n",
"100% 667/667 [01:21<00:00, 8.23it/s]\n",
"[INFO|trainer.py:2656] 2023-02-14 21:48:40,747 >> Saving model checkpoint to out/emotion/roberta_custom\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:48:40,748 >> Configuration saved in out/emotion/roberta_custom/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:48:41,796 >> Model weights saved in out/emotion/roberta_custom/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:48:41,797 >> tokenizer config file saved in out/emotion/roberta_custom/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:48:41,797 >> Special tokens file saved in out/emotion/roberta_custom/special_tokens_map.json\n",
"***** train metrics *****\n",
" epoch = 1.0\n",
" train_loss = 0.8004\n",
" train_runtime = 0:01:21.09\n",
" train_samples = 16000\n",
" train_samples_per_second = 197.296\n",
" train_steps_per_second = 8.225\n",
"INFO:__main__:*** Evaluate ***\n",
"[INFO|trainer.py:725] 2023-02-14 21:48:41,898 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassificationCustomAlternative.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassificationCustomAlternative.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:48:41,899 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:48:41,900 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:48:41,900 >> Batch size = 24\n",
"100% 84/84 [00:03<00:00, 23.62it/s]\n",
"***** eval metrics *****\n",
" epoch = 1.0\n",
" eval_accuracy = 0.867\n",
" eval_loss = 0.39\n",
" eval_runtime = 0:00:03.59\n",
" eval_samples = 2000\n",
" eval_samples_per_second = 555.583\n",
" eval_steps_per_second = 23.334\n",
"INFO:__main__:*** Predict ***\n",
"[INFO|trainer.py:725] 2023-02-14 21:48:45,503 >> The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassificationCustomAlternative.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassificationCustomAlternative.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:48:45,504 >> ***** Running Prediction *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:48:45,504 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:48:45,504 >> Batch size = 24\n",
"100% 84/84 [00:03<00:00, 23.74it/s]\n",
"INFO:__main__:***** Predict results None *****\n",
"[INFO|modelcard.py:444] 2023-02-14 21:48:49,211 >> Dropping the following result as it does not have all the necessary fields:\n",
"{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.8669999837875366}]}\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "HUdoRk5o7ICl"
},
"source": [
"## **GPT2**"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "exFg0yb-7ICl"
},
"source": [
"- full data\n",
"- model `GPT2`\n",
"- sequnece length: 128\n",
"- training epoch: 1"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"id": "DMHK35db7ICl",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "5a3776f5-7feb-480b-a433-a80ed81f3eb7"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"2023-02-14 21:48:52.605236: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2023-02-14 21:48:52.757779: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
"2023-02-14 21:48:53.540701: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 21:48:53.540799: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 21:48:53.540819: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
"WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
"INFO:__main__:Training/evaluation parameters TrainingArguments(\n",
"_n_gpu=1,\n",
"adafactor=False,\n",
"adam_beta1=0.9,\n",
"adam_beta2=0.999,\n",
"adam_epsilon=1e-08,\n",
"auto_find_batch_size=False,\n",
"bf16=False,\n",
"bf16_full_eval=False,\n",
"data_seed=None,\n",
"dataloader_drop_last=False,\n",
"dataloader_num_workers=0,\n",
"dataloader_pin_memory=True,\n",
"ddp_bucket_cap_mb=None,\n",
"ddp_find_unused_parameters=None,\n",
"ddp_timeout=1800,\n",
"debug=[],\n",
"deepspeed=None,\n",
"disable_tqdm=False,\n",
"do_eval=True,\n",
"do_predict=True,\n",
"do_train=True,\n",
"eval_accumulation_steps=None,\n",
"eval_delay=0,\n",
"eval_steps=250,\n",
"evaluation_strategy=steps,\n",
"fp16=False,\n",
"fp16_backend=auto,\n",
"fp16_full_eval=False,\n",
"fp16_opt_level=O1,\n",
"fsdp=[],\n",
"fsdp_min_num_params=0,\n",
"fsdp_transformer_layer_cls_to_wrap=None,\n",
"full_determinism=False,\n",
"gradient_accumulation_steps=1,\n",
"gradient_checkpointing=False,\n",
"greater_is_better=True,\n",
"group_by_length=False,\n",
"half_precision_backend=auto,\n",
"hub_model_id=None,\n",
"hub_private_repo=False,\n",
"hub_strategy=every_save,\n",
"hub_token=<HUB_TOKEN>,\n",
"ignore_data_skip=False,\n",
"include_inputs_for_metrics=False,\n",
"jit_mode_eval=False,\n",
"label_names=None,\n",
"label_smoothing_factor=0.0,\n",
"learning_rate=2e-05,\n",
"length_column_name=length,\n",
"load_best_model_at_end=True,\n",
"local_rank=-1,\n",
"log_level=passive,\n",
"log_level_replica=passive,\n",
"log_on_each_node=True,\n",
"logging_dir=out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00,\n",
"logging_first_step=False,\n",
"logging_nan_inf_filter=True,\n",
"logging_steps=100,\n",
"logging_strategy=steps,\n",
"lr_scheduler_type=linear,\n",
"max_grad_norm=1.0,\n",
"max_steps=2500,\n",
"metric_for_best_model=accuracy,\n",
"mp_parameters=,\n",
"no_cuda=False,\n",
"num_train_epochs=1.0,\n",
"optim=adamw_hf,\n",
"output_dir=out/emotion/gpt2,\n",
"overwrite_output_dir=True,\n",
"past_index=-1,\n",
"per_device_eval_batch_size=24,\n",
"per_device_train_batch_size=24,\n",
"prediction_loss_only=False,\n",
"push_to_hub=False,\n",
"push_to_hub_model_id=None,\n",
"push_to_hub_organization=None,\n",
"push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
"ray_scope=last,\n",
"remove_unused_columns=True,\n",
"report_to=['tensorboard'],\n",
"resume_from_checkpoint=None,\n",
"run_name=out/emotion/gpt2,\n",
"save_on_each_node=False,\n",
"save_steps=500,\n",
"save_strategy=steps,\n",
"save_total_limit=5,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torchdynamo=None,\n",
"tpu_metrics_debug=False,\n",
"tpu_num_cores=None,\n",
"use_ipex=False,\n",
"use_legacy_prediction_loop=False,\n",
"use_mps_device=False,\n",
"warmup_ratio=0.0,\n",
"warmup_steps=0,\n",
"weight_decay=0.0,\n",
"xpu_backend=None,\n",
")\n",
"INFO:__main__:load a local file for train: data/train.json\n",
"INFO:__main__:load a local file for validation: data/valid.json\n",
"INFO:__main__:load a local file for test: data/test.json\n",
"WARNING:datasets.builder:Using custom data configuration default-01aa9d8252a24a0d\n",
"INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n",
"INFO:datasets.builder:Generating dataset json (/content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
"Downloading and preparing dataset json/default to /content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n",
"Downloading data files: 100% 3/3 [00:00<00:00, 12169.16it/s]\n",
"INFO:datasets.download.download_manager:Downloading took 0.0 min\n",
"INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n",
"Extracting data files: 100% 3/3 [00:00<00:00, 2183.40it/s]\n",
"INFO:datasets.utils.info_utils:Unable to verify checksums.\n",
"INFO:datasets.builder:Generating train split\n",
"INFO:datasets.builder:Generating validation split\n",
"INFO:datasets.builder:Generating test split\n",
"INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n",
"Dataset json downloaded and prepared to /content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n",
"100% 3/3 [00:00<00:00, 665.62it/s]\n",
"Downloading (…)lve/main/config.json: 100% 665/665 [00:00<00:00, 125kB/s]\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:48:57,052 >> loading configuration file config.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:48:57,053 >> Model config GPT2Config {\n",
" \"_name_or_path\": \"gpt2\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"id2label\": {\n",
" \"0\": \"LABEL_0\",\n",
" \"1\": \"LABEL_1\",\n",
" \"2\": \"LABEL_2\",\n",
" \"3\": \"LABEL_3\",\n",
" \"4\": \"LABEL_4\",\n",
" \"5\": \"LABEL_5\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"label2id\": {\n",
" \"LABEL_0\": 0,\n",
" \"LABEL_1\": 1,\n",
" \"LABEL_2\": 2,\n",
" \"LABEL_3\": 3,\n",
" \"LABEL_4\": 4,\n",
" \"LABEL_5\": 5\n",
" },\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"reorder_and_upcast_attn\": false,\n",
" \"resid_pdrop\": 0.1,\n",
" \"scale_attn_by_inverse_layer_idx\": false,\n",
" \"scale_attn_weights\": true,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
"[INFO|tokenization_auto.py:418] 2023-02-14 21:48:57,145 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:48:57,236 >> loading configuration file config.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:48:57,237 >> Model config GPT2Config {\n",
" \"_name_or_path\": \"gpt2\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"initializer_range\": 0.02,\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"reorder_and_upcast_attn\": false,\n",
" \"resid_pdrop\": 0.1,\n",
" \"scale_attn_by_inverse_layer_idx\": false,\n",
" \"scale_attn_weights\": true,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
"Downloading (…)olve/main/vocab.json: 100% 1.04M/1.04M [00:00<00:00, 9.20MB/s]\n",
"Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 6.19MB/s]\n",
"Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 11.7MB/s]\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file vocab.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/vocab.json\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file merges.txt from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/merges.txt\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file tokenizer.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer.json\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file added_tokens.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file special_tokens_map.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file tokenizer_config.json from cache at None\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:48:58,447 >> loading configuration file config.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:48:58,448 >> Model config GPT2Config {\n",
" \"_name_or_path\": \"gpt2\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"initializer_range\": 0.02,\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"reorder_and_upcast_attn\": false,\n",
" \"resid_pdrop\": 0.1,\n",
" \"scale_attn_by_inverse_layer_idx\": false,\n",
" \"scale_attn_weights\": true,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
"INFO:__main__:Using implementation from class: AutoModelForSequenceClassification\n",
"Downloading (…)\"pytorch_model.bin\";: 100% 548M/548M [00:05<00:00, 108MB/s]\n",
"[INFO|modeling_utils.py:2156] 2023-02-14 21:49:03,784 >> loading weights file pytorch_model.bin from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin\n",
"[INFO|modeling_utils.py:2606] 2023-02-14 21:49:05,169 >> All model checkpoint weights were used when initializing GPT2ForSequenceClassification.\n",
"\n",
"[WARNING|modeling_utils.py:2608] 2023-02-14 21:49:05,169 >> Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
"[ERROR|tokenization_utils_base.py:1019] 2023-02-14 21:49:05,177 >> Using pad_token, but it is not set yet.\n",
"INFO:__main__:Set PAD token to EOS: <|endoftext|>\n",
"Running tokenizer on dataset: 0% 0/16 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-bb8faaac56c0b87e.arrow\n",
"Running tokenizer on dataset: 100% 16/16 [00:00<00:00, 20.23ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-7b339bb99d7c17a1.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 20.04ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-82acdaa33d6aa0eb.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 20.92ba/s]\n",
"INFO:__main__:Sample 10476 of the training set: {'label': 0, 'text': 'i do find new friends i m going to try extra hard to make them stay and if i decide that i don t want to feel hurt again and just ride out the last year of school on my own i m going to have to try extra hard not to care what people think of me being a loner', 'input_ids': [72, 466, 1064, 649, 2460, 1312, 285, 1016, 284, 1949, 3131, 1327, 284, 787, 606, 2652, 290, 611, 1312, 5409, 326, 1312, 836, 256, 765, 284, 1254, 5938, 757, 290, 655, 6594, 503, 262, 938, 614, 286, 1524, 319, 616, 898, 1312, 285, 1016, 284, 423, 284, 1949, 3131, 1327, 407, 284, 1337, 644, 661, 892, 286, 502, 852, 257, 300, 14491, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 1824 of the training set: {'label': 1, 'text': 'i asked them to join me in creating a world where all year old girls could grow up feeling hopeful and powerful', 'input_ids': [72, 1965, 606, 284, 4654, 502, 287, 4441, 257, 995, 810, 477, 614, 1468, 4813, 714, 1663, 510, 4203, 17836, 290, 3665, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 409 of the training set: {'label': 2, 'text': 'i feel when you are a caring person you attract other caring people into your life', 'input_ids': [72, 1254, 618, 345, 389, 257, 18088, 1048, 345, 4729, 584, 18088, 661, 656, 534, 1204, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"[INFO|trainer.py:503] 2023-02-14 21:49:08,712 >> max_steps is given, it will override any value given in num_train_epochs\n",
"[INFO|trainer.py:725] 2023-02-14 21:49:08,712 >> The following columns in the training set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" warnings.warn(\n",
"[INFO|trainer.py:1607] 2023-02-14 21:49:08,718 >> ***** Running training *****\n",
"[INFO|trainer.py:1608] 2023-02-14 21:49:08,718 >> Num examples = 16000\n",
"[INFO|trainer.py:1609] 2023-02-14 21:49:08,718 >> Num Epochs = 4\n",
"[INFO|trainer.py:1610] 2023-02-14 21:49:08,719 >> Instantaneous batch size per device = 24\n",
"[INFO|trainer.py:1611] 2023-02-14 21:49:08,719 >> Total train batch size (w. parallel, distributed & accumulation) = 24\n",
"[INFO|trainer.py:1612] 2023-02-14 21:49:08,719 >> Gradient Accumulation steps = 1\n",
"[INFO|trainer.py:1613] 2023-02-14 21:49:08,719 >> Total optimization steps = 2500\n",
"{'loss': 2.3442, 'learning_rate': 1.9200000000000003e-05, 'epoch': 0.15}\n",
"{'loss': 1.3126, 'learning_rate': 1.8400000000000003e-05, 'epoch': 0.3}\n",
" 10% 250/2500 [00:37<05:31, 6.79it/s][INFO|trainer.py:725] 2023-02-14 21:49:46,426 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:49:46,428 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:49:46,428 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:49:46,428 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 4% 3/84 [00:00<00:02, 29.40it/s]\u001b[A\n",
" 7% 6/84 [00:00<00:03, 23.74it/s]\u001b[A\n",
" 11% 9/84 [00:00<00:03, 22.40it/s]\u001b[A\n",
" 14% 12/84 [00:00<00:03, 21.78it/s]\u001b[A\n",
" 18% 15/84 [00:00<00:03, 21.50it/s]\u001b[A\n",
" 21% 18/84 [00:00<00:03, 21.30it/s]\u001b[A\n",
" 25% 21/84 [00:00<00:02, 21.20it/s]\u001b[A\n",
" 29% 24/84 [00:01<00:02, 20.97it/s]\u001b[A\n",
" 32% 27/84 [00:01<00:02, 20.93it/s]\u001b[A\n",
" 36% 30/84 [00:01<00:02, 20.97it/s]\u001b[A\n",
" 39% 33/84 [00:01<00:02, 21.00it/s]\u001b[A\n",
" 43% 36/84 [00:01<00:02, 21.01it/s]\u001b[A\n",
" 46% 39/84 [00:01<00:02, 21.03it/s]\u001b[A\n",
" 50% 42/84 [00:01<00:01, 21.03it/s]\u001b[A\n",
" 54% 45/84 [00:02<00:01, 21.02it/s]\u001b[A\n",
" 57% 48/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
" 61% 51/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
" 64% 54/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
" 68% 57/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
" 71% 60/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
" 75% 63/84 [00:02<00:00, 21.00it/s]\u001b[A\n",
" 79% 66/84 [00:03<00:00, 20.99it/s]\u001b[A\n",
" 82% 69/84 [00:03<00:00, 20.94it/s]\u001b[A\n",
" 86% 72/84 [00:03<00:00, 20.95it/s]\u001b[A\n",
" 89% 75/84 [00:03<00:00, 20.98it/s]\u001b[A\n",
" 93% 78/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
" 96% 81/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
"100% 84/84 [00:03<00:00, 22.24it/s]\u001b[A\n",
"{'eval_loss': 0.7983964085578918, 'eval_accuracy': 0.7465000152587891, 'eval_runtime': 3.9877, 'eval_samples_per_second': 501.548, 'eval_steps_per_second': 21.065, 'epoch': 0.37}\n",
"\n",
" 10% 250/2500 [00:41<05:31, 6.79it/s]\n",
"{'loss': 0.7216, 'learning_rate': 1.76e-05, 'epoch': 0.45}\n",
"{'loss': 0.5032, 'learning_rate': 1.6800000000000002e-05, 'epoch': 0.6}\n",
"{'loss': 0.3904, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.75}\n",
" 20% 500/2500 [01:18<04:56, 6.74it/s][INFO|trainer.py:725] 2023-02-14 21:50:27,312 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:50:27,314 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:50:27,314 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:50:27,314 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.77it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.71it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 22.34it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.72it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 21.40it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 21.09it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:02, 21.01it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.95it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.92it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.87it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.91it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.95it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.91it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:01, 20.96it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.82it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.87it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.90it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.94it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.97it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 21.01it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 21.01it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 21.03it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 21.02it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 21.02it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
"{'eval_loss': 0.29131895303726196, 'eval_accuracy': 0.9035000205039978, 'eval_runtime': 3.9922, 'eval_samples_per_second': 500.974, 'eval_steps_per_second': 21.041, 'epoch': 0.75}\n",
"\n",
" 20% 500/2500 [01:22<04:56, 6.74it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:50:31,307 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:50:31,308 >> Configuration saved in out/emotion/gpt2/checkpoint-500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:50:32,356 >> Model weights saved in out/emotion/gpt2/checkpoint-500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:50:32,357 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:50:32,357 >> Special tokens file saved in out/emotion/gpt2/checkpoint-500/special_tokens_map.json\n",
"{'loss': 0.3554, 'learning_rate': 1.5200000000000002e-05, 'epoch': 0.9}\n",
"{'loss': 0.2871, 'learning_rate': 1.4400000000000001e-05, 'epoch': 1.05}\n",
" 30% 750/2500 [02:02<04:19, 6.74it/s][INFO|trainer.py:725] 2023-02-14 21:51:11,104 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:51:11,106 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:51:11,106 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:51:11,106 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.92it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.90it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 22.57it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.98it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 21.63it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 21.40it/s]\u001b[A\n",
" 26% 22/84 [00:00<00:02, 21.31it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 21.22it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 21.17it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 21.12it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 21.03it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 21.03it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 21.02it/s]\u001b[A\n",
" 51% 43/84 [00:01<00:01, 21.04it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 21.04it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 21.07it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 21.07it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 21.03it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 21.03it/s]\u001b[A\n",
" 76% 64/84 [00:02<00:00, 21.04it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
"{'eval_loss': 0.2168988287448883, 'eval_accuracy': 0.9235000014305115, 'eval_runtime': 3.9688, 'eval_samples_per_second': 503.925, 'eval_steps_per_second': 21.165, 'epoch': 1.12}\n",
"\n",
" 30% 750/2500 [02:06<04:19, 6.74it/s]\n",
"{'loss': 0.2285, 'learning_rate': 1.3600000000000002e-05, 'epoch': 1.2}\n",
"{'loss': 0.1888, 'learning_rate': 1.2800000000000001e-05, 'epoch': 1.35}\n",
"{'loss': 0.2106, 'learning_rate': 1.2e-05, 'epoch': 1.5}\n",
" 40% 1000/2500 [02:43<03:41, 6.78it/s][INFO|trainer.py:725] 2023-02-14 21:51:51,748 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:51:51,749 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:51:51,750 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:51:51,750 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 28.08it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.96it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 22.63it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.99it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 21.68it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 21.48it/s]\u001b[A\n",
" 26% 22/84 [00:00<00:02, 21.32it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 21.23it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 21.15it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 21.10it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 21.08it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 21.08it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 21.07it/s]\u001b[A\n",
" 51% 43/84 [00:01<00:01, 21.05it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 21.05it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 21.04it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 21.02it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 21.03it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 21.04it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 21.04it/s]\u001b[A\n",
" 76% 64/84 [00:02<00:00, 21.03it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 21.08it/s]\u001b[A\n",
"{'eval_loss': 0.19490236043930054, 'eval_accuracy': 0.9259999990463257, 'eval_runtime': 3.9658, 'eval_samples_per_second': 504.311, 'eval_steps_per_second': 21.181, 'epoch': 1.5}\n",
"\n",
" 40% 1000/2500 [02:46<03:41, 6.78it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:51:55,716 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-1000\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:51:55,717 >> Configuration saved in out/emotion/gpt2/checkpoint-1000/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:51:56,708 >> Model weights saved in out/emotion/gpt2/checkpoint-1000/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:51:56,709 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-1000/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:51:56,709 >> Special tokens file saved in out/emotion/gpt2/checkpoint-1000/special_tokens_map.json\n",
"{'loss': 0.1906, 'learning_rate': 1.1200000000000001e-05, 'epoch': 1.65}\n",
"{'loss': 0.1793, 'learning_rate': 1.04e-05, 'epoch': 1.8}\n",
" 50% 1250/2500 [03:26<03:04, 6.76it/s][INFO|trainer.py:725] 2023-02-14 21:52:35,220 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:52:35,222 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:52:35,222 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:52:35,222 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.99it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.91it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 22.61it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 22.00it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 21.66it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 21.45it/s]\u001b[A\n",
" 26% 22/84 [00:00<00:02, 21.34it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 21.26it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 21.21it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 21.17it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 21.14it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 21.11it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 21.12it/s]\u001b[A\n",
" 51% 43/84 [00:01<00:01, 21.11it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 21.10it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 21.09it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 21.10it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 21.09it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 21.06it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 21.08it/s]\u001b[A\n",
" 76% 64/84 [00:02<00:00, 21.09it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 21.09it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 21.08it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 21.08it/s]\u001b[A\n",
"{'eval_loss': 0.1607103943824768, 'eval_accuracy': 0.9319999814033508, 'eval_runtime': 3.9612, 'eval_samples_per_second': 504.895, 'eval_steps_per_second': 21.206, 'epoch': 1.87}\n",
"\n",
" 50% 1250/2500 [03:30<03:04, 6.76it/s]\n",
"{'loss': 0.2116, 'learning_rate': 9.600000000000001e-06, 'epoch': 1.95}\n",
"{'loss': 0.1536, 'learning_rate': 8.8e-06, 'epoch': 2.1}\n",
"{'loss': 0.1518, 'learning_rate': 8.000000000000001e-06, 'epoch': 2.25}\n",
" 60% 1500/2500 [04:07<02:26, 6.82it/s][INFO|trainer.py:725] 2023-02-14 21:53:15,831 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:53:15,833 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:53:15,833 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:53:15,833 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 28.10it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.90it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 22.58it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.85it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 21.53it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 21.37it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:02, 21.27it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 21.19it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 21.13it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 21.11it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 21.04it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.94it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.94it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:01, 20.94it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.97it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.97it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.98it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.93it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.94it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.98it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.97it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.99it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 21.02it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
"{'eval_loss': 0.160899356007576, 'eval_accuracy': 0.9330000281333923, 'eval_runtime': 3.9773, 'eval_samples_per_second': 502.855, 'eval_steps_per_second': 21.12, 'epoch': 2.25}\n",
"\n",
" 60% 1500/2500 [04:11<02:26, 6.82it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:53:19,811 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-1500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:53:19,812 >> Configuration saved in out/emotion/gpt2/checkpoint-1500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:53:21,455 >> Model weights saved in out/emotion/gpt2/checkpoint-1500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:53:21,456 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-1500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:53:21,456 >> Special tokens file saved in out/emotion/gpt2/checkpoint-1500/special_tokens_map.json\n",
"{'loss': 0.157, 'learning_rate': 7.2000000000000005e-06, 'epoch': 2.4}\n",
"{'loss': 0.141, 'learning_rate': 6.4000000000000006e-06, 'epoch': 2.55}\n",
" 70% 1750/2500 [04:51<01:50, 6.80it/s][INFO|trainer.py:725] 2023-02-14 21:54:00,007 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:54:00,009 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:54:00,009 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:54:00,009 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.89it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.82it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 22.49it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.85it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 21.48it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 21.31it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:02, 21.20it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 21.09it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 21.00it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.99it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 21.00it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.98it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.98it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 21.02it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 21.02it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 21.02it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 21.03it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
"{'eval_loss': 0.15204769372940063, 'eval_accuracy': 0.9319999814033508, 'eval_runtime': 3.9769, 'eval_samples_per_second': 502.901, 'eval_steps_per_second': 21.122, 'epoch': 2.62}\n",
"\n",
" 70% 1750/2500 [04:55<01:50, 6.80it/s]\n",
"{'loss': 0.1426, 'learning_rate': 5.600000000000001e-06, 'epoch': 2.7}\n",
"{'loss': 0.1463, 'learning_rate': 4.800000000000001e-06, 'epoch': 2.85}\n",
"{'loss': 0.1403, 'learning_rate': 4.000000000000001e-06, 'epoch': 3.0}\n",
" 80% 2000/2500 [05:31<01:13, 6.82it/s][INFO|trainer.py:725] 2023-02-14 21:54:40,633 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:54:40,635 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:54:40,635 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:54:40,635 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.95it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.86it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 22.54it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.95it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 21.60it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 21.42it/s]\u001b[A\n",
" 26% 22/84 [00:00<00:02, 21.29it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 21.14it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 21.10it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 21.07it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 21.08it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 21.05it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 21.06it/s]\u001b[A\n",
" 51% 43/84 [00:01<00:01, 21.04it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.96it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.97it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.96it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.97it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.94it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.95it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.95it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 21.02it/s]\u001b[A\n",
"{'eval_loss': 0.14609387516975403, 'eval_accuracy': 0.9290000200271606, 'eval_runtime': 3.9774, 'eval_samples_per_second': 502.846, 'eval_steps_per_second': 21.12, 'epoch': 3.0}\n",
"\n",
" 80% 2000/2500 [05:35<01:13, 6.82it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:54:44,614 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-2000\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:54:44,615 >> Configuration saved in out/emotion/gpt2/checkpoint-2000/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:54:46,838 >> Model weights saved in out/emotion/gpt2/checkpoint-2000/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:54:46,839 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-2000/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:54:46,839 >> Special tokens file saved in out/emotion/gpt2/checkpoint-2000/special_tokens_map.json\n",
"{'loss': 0.1256, 'learning_rate': 3.2000000000000003e-06, 'epoch': 3.15}\n",
"{'loss': 0.1246, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.3}\n",
" 90% 2250/2500 [06:16<00:36, 6.76it/s][INFO|trainer.py:725] 2023-02-14 21:55:25,309 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:55:25,311 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:55:25,311 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:55:25,311 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.89it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.86it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 22.52it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.87it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 21.57it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 21.40it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:02, 21.29it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 21.22it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 21.18it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 21.15it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 21.14it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 21.12it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 21.10it/s]\u001b[A\n",
" 51% 43/84 [00:01<00:01, 21.09it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 21.09it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 21.10it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 21.10it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 21.10it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 21.10it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 21.06it/s]\u001b[A\n",
" 76% 64/84 [00:02<00:00, 21.06it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 21.02it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 21.01it/s]\u001b[A\n",
"{'eval_loss': 0.15553689002990723, 'eval_accuracy': 0.9294999837875366, 'eval_runtime': 3.967, 'eval_samples_per_second': 504.158, 'eval_steps_per_second': 21.175, 'epoch': 3.37}\n",
"\n",
" 90% 2250/2500 [06:20<00:36, 6.76it/s]\n",
"{'loss': 0.1174, 'learning_rate': 1.6000000000000001e-06, 'epoch': 3.45}\n",
"{'loss': 0.1374, 'learning_rate': 8.000000000000001e-07, 'epoch': 3.6}\n",
"{'loss': 0.1207, 'learning_rate': 0.0, 'epoch': 3.75}\n",
"100% 2500/2500 [06:57<00:00, 6.82it/s][INFO|trainer.py:725] 2023-02-14 21:56:05,969 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:56:05,971 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:56:05,971 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:56:05,971 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.94it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.89it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 22.60it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.97it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 21.57it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 21.34it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:02, 21.23it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 21.12it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 21.09it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 21.09it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 21.07it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 21.06it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 21.01it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:01, 21.03it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 21.02it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.97it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.45it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.64it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.77it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.84it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.92it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.97it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.99it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 21.02it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 21.03it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
"{'eval_loss': 0.15162073075771332, 'eval_accuracy': 0.9309999942779541, 'eval_runtime': 3.9841, 'eval_samples_per_second': 501.992, 'eval_steps_per_second': 21.084, 'epoch': 3.75}\n",
"\n",
"100% 2500/2500 [07:01<00:00, 6.82it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:56:09,956 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-2500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:56:09,957 >> Configuration saved in out/emotion/gpt2/checkpoint-2500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:56:10,953 >> Model weights saved in out/emotion/gpt2/checkpoint-2500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:56:10,954 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-2500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:56:10,954 >> Special tokens file saved in out/emotion/gpt2/checkpoint-2500/special_tokens_map.json\n",
"[INFO|trainer.py:1852] 2023-02-14 21:56:12,777 >> \n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"[INFO|trainer.py:1946] 2023-02-14 21:56:12,778 >> Loading best model from out/emotion/gpt2/checkpoint-1500 (score: 0.9330000281333923).\n",
"{'train_runtime': 424.4983, 'train_samples_per_second': 141.343, 'train_steps_per_second': 5.889, 'train_loss': 0.351297896194458, 'epoch': 3.75}\n",
"100% 2500/2500 [07:04<00:00, 5.89it/s]\n",
"[INFO|trainer.py:2656] 2023-02-14 21:56:13,218 >> Saving model checkpoint to out/emotion/gpt2\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:56:13,220 >> Configuration saved in out/emotion/gpt2/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:56:14,063 >> Model weights saved in out/emotion/gpt2/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:56:14,064 >> tokenizer config file saved in out/emotion/gpt2/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:56:14,064 >> Special tokens file saved in out/emotion/gpt2/special_tokens_map.json\n",
"***** train metrics *****\n",
" epoch = 3.75\n",
" train_loss = 0.3513\n",
" train_runtime = 0:07:04.49\n",
" train_samples = 16000\n",
" train_samples_per_second = 141.343\n",
" train_steps_per_second = 5.889\n",
"INFO:__main__:*** Evaluate ***\n",
"[INFO|trainer.py:725] 2023-02-14 21:56:14,169 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:56:14,170 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:56:14,170 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:56:14,170 >> Batch size = 24\n",
"100% 84/84 [00:03<00:00, 21.20it/s]\n",
"***** eval metrics *****\n",
" epoch = 3.75\n",
" eval_accuracy = 0.933\n",
" eval_loss = 0.1609\n",
" eval_runtime = 0:00:04.02\n",
" eval_samples = 2000\n",
" eval_samples_per_second = 497.496\n",
" eval_steps_per_second = 20.895\n",
"INFO:__main__:*** Predict ***\n",
"[INFO|trainer.py:725] 2023-02-14 21:56:18,194 >> The following columns in the test set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:56:18,195 >> ***** Running Prediction *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:56:18,195 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:56:18,195 >> Batch size = 24\n",
"100% 84/84 [00:03<00:00, 21.40it/s]\n",
"INFO:__main__:***** Predict results None *****\n",
"[INFO|modelcard.py:444] 2023-02-14 21:56:22,304 >> Dropping the following result as it does not have all the necessary fields:\n",
"{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.9330000281333923}]}\n"
]
}
],
"source": [
"!python run_glue.py \\\n",
" --cache_dir gtp_cache_training \\\n",
" --model_name_or_path gpt2 \\\n",
" --train_file data/train.json \\\n",
" --validation_file data/valid.json \\\n",
" --test_file data/test.json \\\n",
" --per_device_train_batch_size 24 \\\n",
" --per_device_eval_batch_size 24 \\\n",
" --do_train \\\n",
" --do_eval \\\n",
" --do_predict \\\n",
" --max_seq_length 128 \\\n",
" --learning_rate 2e-5 \\\n",
" --num_train_epochs 1 \\\n",
" --output_dir out/emotion/gpt2 \\\n",
" --overwrite_output_dir \\\n",
" --eval_steps 250 \\\n",
" --evaluation_strategy steps \\\n",
" --metric_for_best_model accuracy \\\n",
" --logging_steps 100 \\\n",
" --save_total_limit 5 \\\n",
" --max_steps 2500 \\\n",
" --load_best_model_at_end True "
]
},
{
"cell_type": "markdown",
"source": [
"- full dataset\n",
"- custom head"
],
"metadata": {
"id": "zJeUGay5n1JW"
}
},
{
"cell_type": "code",
"source": [
"!python run_glue.py \\\n",
" --cache_dir gtp_custom_cache_training \\\n",
" --model_name_or_path gpt2 \\\n",
" --custom_model gpt2_custom \\\n",
" --train_file data/train.json \\\n",
" --validation_file data/valid.json \\\n",
" --test_file data/test.json \\\n",
" --per_device_train_batch_size 24 \\\n",
" --per_device_eval_batch_size 24 \\\n",
" --do_train \\\n",
" --do_eval \\\n",
" --do_predict \\\n",
" --max_seq_length 128 \\\n",
" --learning_rate 2e-5 \\\n",
" --num_train_epochs 1 \\\n",
" --output_dir out/emotion/gpt2_custom \\\n",
" --overwrite_output_dir \\\n",
" --eval_steps 250 \\\n",
" --evaluation_strategy steps \\\n",
" --metric_for_best_model accuracy \\\n",
" --logging_steps 100 \\\n",
" --save_total_limit 5 \\\n",
" --max_steps 2500 \\\n",
" --load_best_model_at_end True "
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "LXRMDiD-n1nG",
"outputId": "1383e6a3-b485-49a0-d111-05bea71acd23"
},
"execution_count": 11,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"2023-02-14 21:56:25.884599: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2023-02-14 21:56:26.040127: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
"2023-02-14 21:56:26.823479: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 21:56:26.823595: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 21:56:26.823615: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
"WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
"INFO:__main__:Training/evaluation parameters TrainingArguments(\n",
"_n_gpu=1,\n",
"adafactor=False,\n",
"adam_beta1=0.9,\n",
"adam_beta2=0.999,\n",
"adam_epsilon=1e-08,\n",
"auto_find_batch_size=False,\n",
"bf16=False,\n",
"bf16_full_eval=False,\n",
"data_seed=None,\n",
"dataloader_drop_last=False,\n",
"dataloader_num_workers=0,\n",
"dataloader_pin_memory=True,\n",
"ddp_bucket_cap_mb=None,\n",
"ddp_find_unused_parameters=None,\n",
"ddp_timeout=1800,\n",
"debug=[],\n",
"deepspeed=None,\n",
"disable_tqdm=False,\n",
"do_eval=True,\n",
"do_predict=True,\n",
"do_train=True,\n",
"eval_accumulation_steps=None,\n",
"eval_delay=0,\n",
"eval_steps=250,\n",
"evaluation_strategy=steps,\n",
"fp16=False,\n",
"fp16_backend=auto,\n",
"fp16_full_eval=False,\n",
"fp16_opt_level=O1,\n",
"fsdp=[],\n",
"fsdp_min_num_params=0,\n",
"fsdp_transformer_layer_cls_to_wrap=None,\n",
"full_determinism=False,\n",
"gradient_accumulation_steps=1,\n",
"gradient_checkpointing=False,\n",
"greater_is_better=True,\n",
"group_by_length=False,\n",
"half_precision_backend=auto,\n",
"hub_model_id=None,\n",
"hub_private_repo=False,\n",
"hub_strategy=every_save,\n",
"hub_token=<HUB_TOKEN>,\n",
"ignore_data_skip=False,\n",
"include_inputs_for_metrics=False,\n",
"jit_mode_eval=False,\n",
"label_names=None,\n",
"label_smoothing_factor=0.0,\n",
"learning_rate=2e-05,\n",
"length_column_name=length,\n",
"load_best_model_at_end=True,\n",
"local_rank=-1,\n",
"log_level=passive,\n",
"log_level_replica=passive,\n",
"log_on_each_node=True,\n",
"logging_dir=out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00,\n",
"logging_first_step=False,\n",
"logging_nan_inf_filter=True,\n",
"logging_steps=100,\n",
"logging_strategy=steps,\n",
"lr_scheduler_type=linear,\n",
"max_grad_norm=1.0,\n",
"max_steps=2500,\n",
"metric_for_best_model=accuracy,\n",
"mp_parameters=,\n",
"no_cuda=False,\n",
"num_train_epochs=1.0,\n",
"optim=adamw_hf,\n",
"output_dir=out/emotion/gpt2_custom,\n",
"overwrite_output_dir=True,\n",
"past_index=-1,\n",
"per_device_eval_batch_size=24,\n",
"per_device_train_batch_size=24,\n",
"prediction_loss_only=False,\n",
"push_to_hub=False,\n",
"push_to_hub_model_id=None,\n",
"push_to_hub_organization=None,\n",
"push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
"ray_scope=last,\n",
"remove_unused_columns=True,\n",
"report_to=['tensorboard'],\n",
"resume_from_checkpoint=None,\n",
"run_name=out/emotion/gpt2_custom,\n",
"save_on_each_node=False,\n",
"save_steps=500,\n",
"save_strategy=steps,\n",
"save_total_limit=5,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torchdynamo=None,\n",
"tpu_metrics_debug=False,\n",
"tpu_num_cores=None,\n",
"use_ipex=False,\n",
"use_legacy_prediction_loop=False,\n",
"use_mps_device=False,\n",
"warmup_ratio=0.0,\n",
"warmup_steps=0,\n",
"weight_decay=0.0,\n",
"xpu_backend=None,\n",
")\n",
"INFO:__main__:load a local file for train: data/train.json\n",
"INFO:__main__:load a local file for validation: data/valid.json\n",
"INFO:__main__:load a local file for test: data/test.json\n",
"WARNING:datasets.builder:Using custom data configuration default-01aa9d8252a24a0d\n",
"INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n",
"INFO:datasets.builder:Generating dataset json (/content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
"Downloading and preparing dataset json/default to /content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n",
"Downloading data files: 100% 3/3 [00:00<00:00, 14138.10it/s]\n",
"INFO:datasets.download.download_manager:Downloading took 0.0 min\n",
"INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n",
"Extracting data files: 100% 3/3 [00:00<00:00, 2175.09it/s]\n",
"INFO:datasets.utils.info_utils:Unable to verify checksums.\n",
"INFO:datasets.builder:Generating train split\n",
"INFO:datasets.builder:Generating validation split\n",
"INFO:datasets.builder:Generating test split\n",
"INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n",
"Dataset json downloaded and prepared to /content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n",
"100% 3/3 [00:00<00:00, 672.49it/s]\n",
"Downloading (…)lve/main/config.json: 100% 665/665 [00:00<00:00, 123kB/s]\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:56:30,068 >> loading configuration file config.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:56:30,068 >> Model config GPT2Config {\n",
" \"_name_or_path\": \"gpt2\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"id2label\": {\n",
" \"0\": \"LABEL_0\",\n",
" \"1\": \"LABEL_1\",\n",
" \"2\": \"LABEL_2\",\n",
" \"3\": \"LABEL_3\",\n",
" \"4\": \"LABEL_4\",\n",
" \"5\": \"LABEL_5\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"label2id\": {\n",
" \"LABEL_0\": 0,\n",
" \"LABEL_1\": 1,\n",
" \"LABEL_2\": 2,\n",
" \"LABEL_3\": 3,\n",
" \"LABEL_4\": 4,\n",
" \"LABEL_5\": 5\n",
" },\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"reorder_and_upcast_attn\": false,\n",
" \"resid_pdrop\": 0.1,\n",
" \"scale_attn_by_inverse_layer_idx\": false,\n",
" \"scale_attn_weights\": true,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
"[INFO|tokenization_auto.py:418] 2023-02-14 21:56:30,162 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:56:30,251 >> loading configuration file config.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:56:30,252 >> Model config GPT2Config {\n",
" \"_name_or_path\": \"gpt2\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"initializer_range\": 0.02,\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"reorder_and_upcast_attn\": false,\n",
" \"resid_pdrop\": 0.1,\n",
" \"scale_attn_by_inverse_layer_idx\": false,\n",
" \"scale_attn_weights\": true,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
"Downloading (…)olve/main/vocab.json: 100% 1.04M/1.04M [00:00<00:00, 9.18MB/s]\n",
"Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 4.90MB/s]\n",
"Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 14.3MB/s]\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file vocab.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/vocab.json\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file merges.txt from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/merges.txt\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file tokenizer.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer.json\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file added_tokens.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file special_tokens_map.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file tokenizer_config.json from cache at None\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:56:31,525 >> loading configuration file config.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:56:31,526 >> Model config GPT2Config {\n",
" \"_name_or_path\": \"gpt2\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"initializer_range\": 0.02,\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"reorder_and_upcast_attn\": false,\n",
" \"resid_pdrop\": 0.1,\n",
" \"scale_attn_by_inverse_layer_idx\": false,\n",
" \"scale_attn_weights\": true,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
"INFO:__main__:Using hidden states in model: False\n",
"INFO:__main__:Using implementation from class: GPT2ForSequenceClassificationCustom\n",
"Downloading (…)\"pytorch_model.bin\";: 100% 548M/548M [00:05<00:00, 108MB/s]\n",
"[INFO|modeling_utils.py:2156] 2023-02-14 21:56:36,895 >> loading weights file pytorch_model.bin from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin\n",
"[INFO|modeling_utils.py:2606] 2023-02-14 21:56:39,410 >> All model checkpoint weights were used when initializing GPT2ForSequenceClassificationCustom.\n",
"\n",
"[WARNING|modeling_utils.py:2608] 2023-02-14 21:56:39,410 >> Some weights of GPT2ForSequenceClassificationCustom were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.dense_1_hidden.bias', 'score.dense_1_input.weight', 'score.dense_2.bias', 'score.dense_2.weight', 'score.out_proj.weight', 'score.dense_1_hidden.weight', 'score.dense_1_input.bias']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
"[ERROR|tokenization_utils_base.py:1019] 2023-02-14 21:56:39,418 >> Using pad_token, but it is not set yet.\n",
"INFO:__main__:Set PAD token to EOS: <|endoftext|>\n",
"Running tokenizer on dataset: 0% 0/16 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-bb8faaac56c0b87e.arrow\n",
"Running tokenizer on dataset: 100% 16/16 [00:00<00:00, 19.61ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-7b339bb99d7c17a1.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 20.48ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-82acdaa33d6aa0eb.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 7.71ba/s]\n",
"INFO:__main__:Sample 10476 of the training set: {'label': 0, 'text': 'i do find new friends i m going to try extra hard to make them stay and if i decide that i don t want to feel hurt again and just ride out the last year of school on my own i m going to have to try extra hard not to care what people think of me being a loner', 'input_ids': [72, 466, 1064, 649, 2460, 1312, 285, 1016, 284, 1949, 3131, 1327, 284, 787, 606, 2652, 290, 611, 1312, 5409, 326, 1312, 836, 256, 765, 284, 1254, 5938, 757, 290, 655, 6594, 503, 262, 938, 614, 286, 1524, 319, 616, 898, 1312, 285, 1016, 284, 423, 284, 1949, 3131, 1327, 407, 284, 1337, 644, 661, 892, 286, 502, 852, 257, 300, 14491, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 1824 of the training set: {'label': 1, 'text': 'i asked them to join me in creating a world where all year old girls could grow up feeling hopeful and powerful', 'input_ids': [72, 1965, 606, 284, 4654, 502, 287, 4441, 257, 995, 810, 477, 614, 1468, 4813, 714, 1663, 510, 4203, 17836, 290, 3665, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 409 of the training set: {'label': 2, 'text': 'i feel when you are a caring person you attract other caring people into your life', 'input_ids': [72, 1254, 618, 345, 389, 257, 18088, 1048, 345, 4729, 584, 18088, 661, 656, 534, 1204, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"[INFO|trainer.py:503] 2023-02-14 21:56:42,941 >> max_steps is given, it will override any value given in num_train_epochs\n",
"[INFO|trainer.py:725] 2023-02-14 21:56:42,941 >> The following columns in the training set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" warnings.warn(\n",
"[INFO|trainer.py:1607] 2023-02-14 21:56:42,947 >> ***** Running training *****\n",
"[INFO|trainer.py:1608] 2023-02-14 21:56:42,947 >> Num examples = 16000\n",
"[INFO|trainer.py:1609] 2023-02-14 21:56:42,947 >> Num Epochs = 4\n",
"[INFO|trainer.py:1610] 2023-02-14 21:56:42,947 >> Instantaneous batch size per device = 24\n",
"[INFO|trainer.py:1611] 2023-02-14 21:56:42,947 >> Total train batch size (w. parallel, distributed & accumulation) = 24\n",
"[INFO|trainer.py:1612] 2023-02-14 21:56:42,947 >> Gradient Accumulation steps = 1\n",
"[INFO|trainer.py:1613] 2023-02-14 21:56:42,947 >> Total optimization steps = 2500\n",
"{'loss': 1.6218, 'learning_rate': 1.9200000000000003e-05, 'epoch': 0.15}\n",
"{'loss': 1.1593, 'learning_rate': 1.8400000000000003e-05, 'epoch': 0.3}\n",
" 10% 250/2500 [00:39<05:43, 6.56it/s][INFO|trainer.py:725] 2023-02-14 21:57:22,025 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:57:22,027 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:57:22,027 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:57:22,027 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 26.97it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 22.99it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.78it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.18it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.86it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.66it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.55it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.44it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.32it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.32it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.31it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.30it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.31it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.32it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.29it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.27it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.23it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.22it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.23it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 20.22it/s]\u001b[A\n",
"{'eval_loss': 0.6981180310249329, 'eval_accuracy': 0.7329999804496765, 'eval_runtime': 4.1201, 'eval_samples_per_second': 485.426, 'eval_steps_per_second': 20.388, 'epoch': 0.37}\n",
"\n",
" 10% 250/2500 [00:43<05:43, 6.56it/s]\n",
"{'loss': 0.8016, 'learning_rate': 1.76e-05, 'epoch': 0.45}\n",
"{'loss': 0.5481, 'learning_rate': 1.6800000000000002e-05, 'epoch': 0.6}\n",
"{'loss': 0.4045, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.75}\n",
" 20% 500/2500 [01:21<05:03, 6.58it/s][INFO|trainer.py:725] 2023-02-14 21:58:04,246 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:58:04,248 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:58:04,248 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:58:04,248 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 26.97it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.02it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.78it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.20it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.86it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.19it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.20it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.21it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.22it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.23it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.23it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.24it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.25it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.24it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.25it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.24it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.26it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.25it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.25it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.24it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.24it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
" 98% 82/84 [00:04<00:00, 20.24it/s]\u001b[A\n",
"{'eval_loss': 0.29522550106048584, 'eval_accuracy': 0.9100000262260437, 'eval_runtime': 4.1309, 'eval_samples_per_second': 484.153, 'eval_steps_per_second': 20.334, 'epoch': 0.75}\n",
"\n",
" 20% 500/2500 [01:25<05:03, 6.58it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:58:08,380 >> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:58:08,381 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:58:09,983 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:58:09,984 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:58:09,984 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-500/special_tokens_map.json\n",
"{'loss': 0.356, 'learning_rate': 1.5200000000000002e-05, 'epoch': 0.9}\n",
"{'loss': 0.2714, 'learning_rate': 1.4400000000000001e-05, 'epoch': 1.05}\n",
" 30% 750/2500 [02:07<04:25, 6.59it/s][INFO|trainer.py:725] 2023-02-14 21:58:49,972 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:58:49,973 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:58:49,974 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:58:49,974 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.06it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.11it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.85it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.25it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.89it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.67it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.56it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.48it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.42it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.39it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.37it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.34it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.31it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.32it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.29it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.25it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.27it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.31it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
"{'eval_loss': 0.22870442271232605, 'eval_accuracy': 0.9200000166893005, 'eval_runtime': 4.1118, 'eval_samples_per_second': 486.403, 'eval_steps_per_second': 20.429, 'epoch': 1.12}\n",
"\n",
" 30% 750/2500 [02:11<04:25, 6.59it/s]\n",
"{'loss': 0.2332, 'learning_rate': 1.3600000000000002e-05, 'epoch': 1.2}\n",
"{'loss': 0.2135, 'learning_rate': 1.2800000000000001e-05, 'epoch': 1.35}\n",
"{'loss': 0.2283, 'learning_rate': 1.2e-05, 'epoch': 1.5}\n",
" 40% 1000/2500 [02:49<03:48, 6.57it/s][INFO|trainer.py:725] 2023-02-14 21:59:32,169 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:59:32,170 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:59:32,170 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:59:32,171 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.03it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.07it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.78it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.17it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.84it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.62it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.52it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.39it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.36it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.33it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.31it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.28it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.30it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.14it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.18it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.20it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.22it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.24it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.26it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.29it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.31it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 98% 82/84 [00:04<00:00, 20.25it/s]\u001b[A\n",
"{'eval_loss': 0.16501356661319733, 'eval_accuracy': 0.9319999814033508, 'eval_runtime': 4.1217, 'eval_samples_per_second': 485.232, 'eval_steps_per_second': 20.38, 'epoch': 1.5}\n",
"\n",
" 40% 1000/2500 [02:53<03:48, 6.57it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:59:36,293 >> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-1000\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:59:36,294 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-1000/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:59:37,744 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-1000/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:59:37,744 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-1000/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:59:37,744 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-1000/special_tokens_map.json\n",
"{'loss': 0.1836, 'learning_rate': 1.1200000000000001e-05, 'epoch': 1.65}\n",
"{'loss': 0.1844, 'learning_rate': 1.04e-05, 'epoch': 1.8}\n",
" 50% 1250/2500 [03:34<03:09, 6.59it/s][INFO|trainer.py:725] 2023-02-14 22:00:17,827 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 22:00:17,829 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:00:17,829 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:00:17,829 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.06it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.06it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.79it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.21it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.88it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.65it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.55it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.47it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.34it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.30it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.27it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.28it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.26it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.26it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.29it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.29it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
"{'eval_loss': 0.15909001231193542, 'eval_accuracy': 0.9355000257492065, 'eval_runtime': 4.1177, 'eval_samples_per_second': 485.712, 'eval_steps_per_second': 20.4, 'epoch': 1.87}\n",
"\n",
" 50% 1250/2500 [03:38<03:09, 6.59it/s]\n",
"{'loss': 0.2181, 'learning_rate': 9.600000000000001e-06, 'epoch': 1.95}\n",
"{'loss': 0.1695, 'learning_rate': 8.8e-06, 'epoch': 2.1}\n",
"{'loss': 0.1683, 'learning_rate': 8.000000000000001e-06, 'epoch': 2.25}\n",
" 60% 1500/2500 [04:17<02:32, 6.55it/s][INFO|trainer.py:725] 2023-02-14 22:00:59,986 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 22:00:59,988 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:00:59,988 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:00:59,988 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.10it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.06it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.79it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.16it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.86it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.65it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.52it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.45it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.30it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.24it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.11it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.12it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.17it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.19it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.22it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.20it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.23it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.23it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.24it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.22it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.24it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.23it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.20it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.22it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.20it/s]\u001b[A\n",
" 98% 82/84 [00:04<00:00, 20.20it/s]\u001b[A\n",
"{'eval_loss': 0.1472882628440857, 'eval_accuracy': 0.934499979019165, 'eval_runtime': 4.13, 'eval_samples_per_second': 484.258, 'eval_steps_per_second': 20.339, 'epoch': 2.25}\n",
"\n",
" 60% 1500/2500 [04:21<02:32, 6.55it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:01:04,119 >> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-1500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:01:04,120 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-1500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:01:05,576 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-1500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:01:05,576 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-1500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:01:05,576 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-1500/special_tokens_map.json\n",
"{'loss': 0.1497, 'learning_rate': 7.2000000000000005e-06, 'epoch': 2.4}\n",
"{'loss': 0.1496, 'learning_rate': 6.4000000000000006e-06, 'epoch': 2.55}\n",
" 70% 1750/2500 [05:02<01:54, 6.54it/s][INFO|trainer.py:725] 2023-02-14 22:01:45,617 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 22:01:45,618 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:01:45,619 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:01:45,619 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 26.78it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 22.79it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.58it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.03it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.70it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.49it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.30it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.22it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.19it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.16it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.15it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.14it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.12it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.09it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.08it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.10it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.13it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.19it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.20it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.22it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.21it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.22it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 98% 82/84 [00:04<00:00, 20.25it/s]\u001b[A\n",
"{'eval_loss': 0.14743593335151672, 'eval_accuracy': 0.9359999895095825, 'eval_runtime': 4.1413, 'eval_samples_per_second': 482.944, 'eval_steps_per_second': 20.284, 'epoch': 2.62}\n",
"\n",
" 70% 1750/2500 [05:06<01:54, 6.54it/s]\n",
"{'loss': 0.1465, 'learning_rate': 5.600000000000001e-06, 'epoch': 2.7}\n",
"{'loss': 0.1376, 'learning_rate': 4.800000000000001e-06, 'epoch': 2.85}\n",
"{'loss': 0.1444, 'learning_rate': 4.000000000000001e-06, 'epoch': 3.0}\n",
" 80% 2000/2500 [05:44<01:16, 6.57it/s][INFO|trainer.py:725] 2023-02-14 22:02:27,845 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 22:02:27,846 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:02:27,846 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:02:27,846 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.04it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.04it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.75it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.18it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.85it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.61it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.49it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.43it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.39it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.14it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.16it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.21it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.22it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.22it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.20it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.19it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.20it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.22it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.24it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.24it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.24it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.21it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.21it/s]\u001b[A\n",
" 98% 82/84 [00:04<00:00, 20.20it/s]\u001b[A\n",
"{'eval_loss': 0.14364145696163177, 'eval_accuracy': 0.9365000128746033, 'eval_runtime': 4.1279, 'eval_samples_per_second': 484.505, 'eval_steps_per_second': 20.349, 'epoch': 3.0}\n",
"\n",
" 80% 2000/2500 [05:49<01:16, 6.57it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:02:31,975 >> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-2000\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:02:31,976 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-2000/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:02:33,429 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-2000/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:02:33,430 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-2000/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:02:33,430 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-2000/special_tokens_map.json\n",
"{'loss': 0.104, 'learning_rate': 3.2000000000000003e-06, 'epoch': 3.15}\n",
"{'loss': 0.1206, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.3}\n",
" 90% 2250/2500 [06:30<00:38, 6.55it/s][INFO|trainer.py:725] 2023-02-14 22:03:13,484 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 22:03:13,486 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:03:13,486 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:03:13,486 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.11it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.10it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.81it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.22it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.88it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.68it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.56it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.47it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.41it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.38it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.34it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.34it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.33it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.26it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.26it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.17it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.21it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.21it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.23it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.25it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.29it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
"{'eval_loss': 0.15543130040168762, 'eval_accuracy': 0.9369999766349792, 'eval_runtime': 4.1171, 'eval_samples_per_second': 485.782, 'eval_steps_per_second': 20.403, 'epoch': 3.37}\n",
"\n",
" 90% 2250/2500 [06:34<00:38, 6.55it/s]\n",
"{'loss': 0.1289, 'learning_rate': 1.6000000000000001e-06, 'epoch': 3.45}\n",
"{'loss': 0.1231, 'learning_rate': 8.000000000000001e-07, 'epoch': 3.6}\n",
"{'loss': 0.1179, 'learning_rate': 0.0, 'epoch': 3.75}\n",
"100% 2500/2500 [07:12<00:00, 6.57it/s][INFO|trainer.py:725] 2023-02-14 22:03:55,704 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 22:03:55,705 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:03:55,705 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:03:55,706 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.06it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.11it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.81it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.13it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.82it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.65it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.47it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.41it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.38it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.35it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.35it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.32it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.30it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.30it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.29it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.31it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.26it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.24it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.29it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.29it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
"{'eval_loss': 0.14437170326709747, 'eval_accuracy': 0.9350000023841858, 'eval_runtime': 4.116, 'eval_samples_per_second': 485.915, 'eval_steps_per_second': 20.408, 'epoch': 3.75}\n",
"\n",
"100% 2500/2500 [07:16<00:00, 6.57it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:03:59,822 >> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-2500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:03:59,823 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-2500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:04:00,568 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-2500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:04:00,569 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-2500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:04:00,569 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-2500/special_tokens_map.json\n",
"[INFO|trainer.py:1852] 2023-02-14 22:04:02,582 >> \n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"[INFO|trainer.py:1946] 2023-02-14 22:04:02,582 >> Loading best model from out/emotion/gpt2_custom/checkpoint-2000 (score: 0.9365000128746033).\n",
"{'train_runtime': 440.0758, 'train_samples_per_second': 136.34, 'train_steps_per_second': 5.681, 'train_loss': 0.32335229415893557, 'epoch': 3.75}\n",
"100% 2500/2500 [07:20<00:00, 5.68it/s]\n",
"[INFO|trainer.py:2656] 2023-02-14 22:04:03,025 >> Saving model checkpoint to out/emotion/gpt2_custom\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:04:03,026 >> Configuration saved in out/emotion/gpt2_custom/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:04:03,965 >> Model weights saved in out/emotion/gpt2_custom/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:04:03,966 >> tokenizer config file saved in out/emotion/gpt2_custom/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:04:03,966 >> Special tokens file saved in out/emotion/gpt2_custom/special_tokens_map.json\n",
"***** train metrics *****\n",
" epoch = 3.75\n",
" train_loss = 0.3234\n",
" train_runtime = 0:07:20.07\n",
" train_samples = 16000\n",
" train_samples_per_second = 136.34\n",
" train_steps_per_second = 5.681\n",
"INFO:__main__:*** Evaluate ***\n",
"[INFO|trainer.py:725] 2023-02-14 22:04:04,068 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 22:04:04,069 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:04:04,069 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:04:04,070 >> Batch size = 24\n",
"100% 84/84 [00:04<00:00, 20.35it/s]\n",
"***** eval metrics *****\n",
" epoch = 3.75\n",
" eval_accuracy = 0.9365\n",
" eval_loss = 0.1436\n",
" eval_runtime = 0:00:04.18\n",
" eval_samples = 2000\n",
" eval_samples_per_second = 477.778\n",
" eval_steps_per_second = 20.067\n",
"INFO:__main__:*** Predict ***\n",
"[INFO|trainer.py:725] 2023-02-14 22:04:08,259 >> The following columns in the test set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 22:04:08,260 >> ***** Running Prediction *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:04:08,260 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:04:08,260 >> Batch size = 24\n",
"100% 84/84 [00:04<00:00, 20.62it/s]\n",
"INFO:__main__:***** Predict results None *****\n",
"[INFO|modelcard.py:444] 2023-02-14 22:04:12,537 >> Dropping the following result as it does not have all the necessary fields:\n",
"{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.9365000128746033}]}\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "VrHmnOaT7ICl"
},
"source": [
"## **T5**"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "CmuDde477ICl"
},
"source": [
"- full data\n",
"- model `T5`\n",
"- sequnece length: 128\n",
"- training epoch: 1\n",
"- first few layers frozen"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"id": "2ruXjeqj7ICl",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "4d73b407-08c3-4007-aa32-c8709dd696fa"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"2023-02-14 22:04:17.129470: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2023-02-14 22:04:17.281426: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
"2023-02-14 22:04:18.087509: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 22:04:18.087605: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 22:04:18.087624: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
"WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
"INFO:__main__:Training/evaluation parameters Seq2SeqTrainingArguments(\n",
"_n_gpu=1,\n",
"adafactor=False,\n",
"adam_beta1=0.9,\n",
"adam_beta2=0.999,\n",
"adam_epsilon=1e-08,\n",
"auto_find_batch_size=False,\n",
"bf16=False,\n",
"bf16_full_eval=False,\n",
"data_seed=None,\n",
"dataloader_drop_last=False,\n",
"dataloader_num_workers=0,\n",
"dataloader_pin_memory=True,\n",
"ddp_bucket_cap_mb=None,\n",
"ddp_find_unused_parameters=None,\n",
"ddp_timeout=1800,\n",
"debug=[],\n",
"deepspeed=None,\n",
"disable_tqdm=False,\n",
"do_eval=True,\n",
"do_predict=True,\n",
"do_train=True,\n",
"eval_accumulation_steps=None,\n",
"eval_delay=0,\n",
"eval_steps=250,\n",
"evaluation_strategy=steps,\n",
"fp16=False,\n",
"fp16_backend=auto,\n",
"fp16_full_eval=False,\n",
"fp16_opt_level=O1,\n",
"fsdp=[],\n",
"fsdp_min_num_params=0,\n",
"fsdp_transformer_layer_cls_to_wrap=None,\n",
"full_determinism=False,\n",
"generation_max_length=128,\n",
"generation_num_beams=None,\n",
"gradient_accumulation_steps=1,\n",
"gradient_checkpointing=False,\n",
"greater_is_better=True,\n",
"group_by_length=False,\n",
"half_precision_backend=auto,\n",
"hub_model_id=None,\n",
"hub_private_repo=False,\n",
"hub_strategy=every_save,\n",
"hub_token=<HUB_TOKEN>,\n",
"ignore_data_skip=False,\n",
"include_inputs_for_metrics=False,\n",
"jit_mode_eval=False,\n",
"label_names=None,\n",
"label_smoothing_factor=0.0,\n",
"learning_rate=5e-05,\n",
"length_column_name=length,\n",
"load_best_model_at_end=True,\n",
"local_rank=-1,\n",
"log_level=passive,\n",
"log_level_replica=passive,\n",
"log_on_each_node=True,\n",
"logging_dir=out/emotion/t5_v1_1/runs/Feb14_22-04-20_fc0011e45a00,\n",
"logging_first_step=False,\n",
"logging_nan_inf_filter=True,\n",
"logging_steps=100,\n",
"logging_strategy=steps,\n",
"lr_scheduler_type=linear,\n",
"max_grad_norm=1.0,\n",
"max_steps=2500,\n",
"metric_for_best_model=accuracy,\n",
"mp_parameters=,\n",
"no_cuda=False,\n",
"num_train_epochs=1.0,\n",
"optim=adamw_hf,\n",
"output_dir=out/emotion/t5_v1_1,\n",
"overwrite_output_dir=True,\n",
"past_index=-1,\n",
"per_device_eval_batch_size=8,\n",
"per_device_train_batch_size=8,\n",
"predict_with_generate=True,\n",
"prediction_loss_only=False,\n",
"push_to_hub=False,\n",
"push_to_hub_model_id=None,\n",
"push_to_hub_organization=None,\n",
"push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
"ray_scope=last,\n",
"remove_unused_columns=True,\n",
"report_to=['tensorboard'],\n",
"resume_from_checkpoint=None,\n",
"run_name=out/emotion/t5_v1_1,\n",
"save_on_each_node=False,\n",
"save_steps=500,\n",
"save_strategy=steps,\n",
"save_total_limit=5,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"sortish_sampler=False,\n",
"tf32=None,\n",
"torchdynamo=None,\n",
"tpu_metrics_debug=False,\n",
"tpu_num_cores=None,\n",
"use_ipex=False,\n",
"use_legacy_prediction_loop=False,\n",
"use_mps_device=False,\n",
"warmup_ratio=0.0,\n",
"warmup_steps=0,\n",
"weight_decay=0.0,\n",
"xpu_backend=None,\n",
")\n",
"WARNING:datasets.builder:Using custom data configuration default-a82ca4164dba097e\n",
"INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n",
"INFO:datasets.builder:Generating dataset json (/content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
"Downloading and preparing dataset json/default to /content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n",
"Downloading data files: 100% 3/3 [00:00<00:00, 11848.32it/s]\n",
"INFO:datasets.download.download_manager:Downloading took 0.0 min\n",
"INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n",
"Extracting data files: 100% 3/3 [00:00<00:00, 2097.85it/s]\n",
"INFO:datasets.utils.info_utils:Unable to verify checksums.\n",
"INFO:datasets.builder:Generating train split\n",
"INFO:datasets.builder:Generating validation split\n",
"INFO:datasets.builder:Generating test split\n",
"INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n",
"Dataset json downloaded and prepared to /content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n",
"100% 3/3 [00:00<00:00, 953.83it/s]\n",
"Downloading (…)lve/main/config.json: 100% 537/537 [00:00<00:00, 97.0kB/s]\n",
"[INFO|configuration_utils.py:653] 2023-02-14 22:04:20,972 >> loading configuration file config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 22:04:20,975 >> Model config T5Config {\n",
" \"_name_or_path\": \"google/t5-v1_1-small\",\n",
" \"architectures\": [\n",
" \"T5ForConditionalGeneration\"\n",
" ],\n",
" \"d_ff\": 1024,\n",
" \"d_kv\": 64,\n",
" \"d_model\": 512,\n",
" \"decoder_start_token_id\": 0,\n",
" \"dense_act_fn\": \"gelu_new\",\n",
" \"dropout_rate\": 0.1,\n",
" \"eos_token_id\": 1,\n",
" \"feed_forward_proj\": \"gated-gelu\",\n",
" \"initializer_factor\": 1.0,\n",
" \"is_encoder_decoder\": true,\n",
" \"is_gated_act\": true,\n",
" \"layer_norm_epsilon\": 1e-06,\n",
" \"model_type\": \"t5\",\n",
" \"num_decoder_layers\": 8,\n",
" \"num_heads\": 6,\n",
" \"num_layers\": 8,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": 0,\n",
" \"relative_attention_max_distance\": 128,\n",
" \"relative_attention_num_buckets\": 32,\n",
" \"tie_word_embeddings\": false,\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 32128\n",
"}\n",
"\n",
"Downloading (…)okenizer_config.json: 100% 1.86k/1.86k [00:00<00:00, 853kB/s]\n",
"[INFO|configuration_utils.py:653] 2023-02-14 22:04:21,160 >> loading configuration file config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 22:04:21,160 >> Model config T5Config {\n",
" \"_name_or_path\": \"google/t5-v1_1-small\",\n",
" \"architectures\": [\n",
" \"T5ForConditionalGeneration\"\n",
" ],\n",
" \"d_ff\": 1024,\n",
" \"d_kv\": 64,\n",
" \"d_model\": 512,\n",
" \"decoder_start_token_id\": 0,\n",
" \"dense_act_fn\": \"gelu_new\",\n",
" \"dropout_rate\": 0.1,\n",
" \"eos_token_id\": 1,\n",
" \"feed_forward_proj\": \"gated-gelu\",\n",
" \"initializer_factor\": 1.0,\n",
" \"is_encoder_decoder\": true,\n",
" \"is_gated_act\": true,\n",
" \"layer_norm_epsilon\": 1e-06,\n",
" \"model_type\": \"t5\",\n",
" \"num_decoder_layers\": 8,\n",
" \"num_heads\": 6,\n",
" \"num_layers\": 8,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": 0,\n",
" \"relative_attention_max_distance\": 128,\n",
" \"relative_attention_num_buckets\": 32,\n",
" \"tie_word_embeddings\": false,\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 32128\n",
"}\n",
"\n",
"Downloading (…)ve/main/spiece.model: 100% 792k/792k [00:00<00:00, 10.2MB/s]\n",
"Downloading (…)cial_tokens_map.json: 100% 1.79k/1.79k [00:00<00:00, 705kB/s]\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file spiece.model from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/spiece.model\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file tokenizer.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file added_tokens.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file special_tokens_map.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/special_tokens_map.json\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file tokenizer_config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/tokenizer_config.json\n",
"[INFO|configuration_utils.py:653] 2023-02-14 22:04:21,838 >> loading configuration file config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 22:04:21,838 >> Model config T5Config {\n",
" \"_name_or_path\": \"google/t5-v1_1-small\",\n",
" \"architectures\": [\n",
" \"T5ForConditionalGeneration\"\n",
" ],\n",
" \"d_ff\": 1024,\n",
" \"d_kv\": 64,\n",
" \"d_model\": 512,\n",
" \"decoder_start_token_id\": 0,\n",
" \"dense_act_fn\": \"gelu_new\",\n",
" \"dropout_rate\": 0.1,\n",
" \"eos_token_id\": 1,\n",
" \"feed_forward_proj\": \"gated-gelu\",\n",
" \"initializer_factor\": 1.0,\n",
" \"is_encoder_decoder\": true,\n",
" \"is_gated_act\": true,\n",
" \"layer_norm_epsilon\": 1e-06,\n",
" \"model_type\": \"t5\",\n",
" \"num_decoder_layers\": 8,\n",
" \"num_heads\": 6,\n",
" \"num_layers\": 8,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": 0,\n",
" \"relative_attention_max_distance\": 128,\n",
" \"relative_attention_num_buckets\": 32,\n",
" \"tie_word_embeddings\": false,\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 32128\n",
"}\n",
"\n",
"[INFO|configuration_utils.py:653] 2023-02-14 22:04:21,888 >> loading configuration file config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 22:04:21,889 >> Model config T5Config {\n",
" \"_name_or_path\": \"google/t5-v1_1-small\",\n",
" \"architectures\": [\n",
" \"T5ForConditionalGeneration\"\n",
" ],\n",
" \"d_ff\": 1024,\n",
" \"d_kv\": 64,\n",
" \"d_model\": 512,\n",
" \"decoder_start_token_id\": 0,\n",
" \"dense_act_fn\": \"gelu_new\",\n",
" \"dropout_rate\": 0.1,\n",
" \"eos_token_id\": 1,\n",
" \"feed_forward_proj\": \"gated-gelu\",\n",
" \"initializer_factor\": 1.0,\n",
" \"is_encoder_decoder\": true,\n",
" \"is_gated_act\": true,\n",
" \"layer_norm_epsilon\": 1e-06,\n",
" \"model_type\": \"t5\",\n",
" \"num_decoder_layers\": 8,\n",
" \"num_heads\": 6,\n",
" \"num_layers\": 8,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": 0,\n",
" \"relative_attention_max_distance\": 128,\n",
" \"relative_attention_num_buckets\": 32,\n",
" \"tie_word_embeddings\": false,\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 32128\n",
"}\n",
"\n",
"Downloading (…)\"pytorch_model.bin\";: 100% 308M/308M [00:03<00:00, 84.8MB/s]\n",
"[INFO|modeling_utils.py:2156] 2023-02-14 22:04:26,050 >> loading weights file pytorch_model.bin from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/pytorch_model.bin\n",
"[INFO|modeling_utils.py:2606] 2023-02-14 22:04:27,048 >> All model checkpoint weights were used when initializing T5ForConditionalGeneration.\n",
"\n",
"[INFO|modeling_utils.py:2614] 2023-02-14 22:04:27,048 >> All the weights of T5ForConditionalGeneration were initialized from the model checkpoint at google/t5-v1_1-small.\n",
"If your task is similar to the task the model of the checkpoint was trained on, you can already use T5ForConditionalGeneration for predictions without further training.\n",
"\n",
"\n",
"Frozen layers:\n",
"[('encoder.block.1.layer.0.SelfAttention.q.weight', False), ('encoder.block.1.layer.0.SelfAttention.k.weight', False), ('encoder.block.1.layer.0.SelfAttention.v.weight', False), ('encoder.block.1.layer.0.SelfAttention.o.weight', False), ('encoder.block.1.layer.0.layer_norm.weight', False), ('encoder.block.1.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.1.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.1.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.1.layer.1.layer_norm.weight', False), ('encoder.block.2.layer.0.SelfAttention.q.weight', False), ('encoder.block.2.layer.0.SelfAttention.k.weight', False), ('encoder.block.2.layer.0.SelfAttention.v.weight', False), ('encoder.block.2.layer.0.SelfAttention.o.weight', False), ('encoder.block.2.layer.0.layer_norm.weight', False), ('encoder.block.2.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.2.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.2.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.2.layer.1.layer_norm.weight', False), ('encoder.block.3.layer.0.SelfAttention.q.weight', False), ('encoder.block.3.layer.0.SelfAttention.k.weight', False), ('encoder.block.3.layer.0.SelfAttention.v.weight', False), ('encoder.block.3.layer.0.SelfAttention.o.weight', False), ('encoder.block.3.layer.0.layer_norm.weight', False), ('encoder.block.3.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.3.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.3.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.3.layer.1.layer_norm.weight', False), ('encoder.block.4.layer.0.SelfAttention.q.weight', False), ('encoder.block.4.layer.0.SelfAttention.k.weight', False), ('encoder.block.4.layer.0.SelfAttention.v.weight', False), ('encoder.block.4.layer.0.SelfAttention.o.weight', False), ('encoder.block.4.layer.0.layer_norm.weight', False), ('encoder.block.4.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.4.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.4.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.4.layer.1.layer_norm.weight', False), ('encoder.block.5.layer.0.SelfAttention.q.weight', False), ('encoder.block.5.layer.0.SelfAttention.k.weight', False), ('encoder.block.5.layer.0.SelfAttention.v.weight', False), ('encoder.block.5.layer.0.SelfAttention.o.weight', False), ('encoder.block.5.layer.0.layer_norm.weight', False), ('encoder.block.5.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.5.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.5.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.5.layer.1.layer_norm.weight', False), ('encoder.block.6.layer.0.SelfAttention.q.weight', False), ('encoder.block.6.layer.0.SelfAttention.k.weight', False), ('encoder.block.6.layer.0.SelfAttention.v.weight', False), ('encoder.block.6.layer.0.SelfAttention.o.weight', False), ('encoder.block.6.layer.0.layer_norm.weight', False), ('encoder.block.6.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.6.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.6.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.6.layer.1.layer_norm.weight', False), ('encoder.block.7.layer.0.SelfAttention.q.weight', False), ('encoder.block.7.layer.0.SelfAttention.k.weight', False), ('encoder.block.7.layer.0.SelfAttention.v.weight', False), ('encoder.block.7.layer.0.SelfAttention.o.weight', False), ('encoder.block.7.layer.0.layer_norm.weight', False), ('encoder.block.7.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.7.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.7.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.7.layer.1.layer_norm.weight', False)] \n",
"\n",
"\n",
"INFO:__main__:Using translation prefix: \"emotion classification: \"\n",
"Running tokenizer on train dataset: 0% 0/16 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-fa17416eabe18767.arrow\n",
"Running tokenizer on train dataset: 100% 16/16 [00:00<00:00, 23.64ba/s]\n",
"Running tokenizer on validation dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-c6cebbf9290f7df0.arrow\n",
"Running tokenizer on validation dataset: 100% 2/2 [00:00<00:00, 33.01ba/s]\n",
"Running tokenizer on prediction dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-988bff0993eee389.arrow\n",
"Running tokenizer on prediction dataset: 100% 2/2 [00:00<00:00, 33.06ba/s]\n",
"[INFO|trainer.py:503] 2023-02-14 22:04:30,902 >> max_steps is given, it will override any value given in num_train_epochs\n",
"/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" warnings.warn(\n",
"[INFO|trainer.py:1607] 2023-02-14 22:04:30,911 >> ***** Running training *****\n",
"[INFO|trainer.py:1608] 2023-02-14 22:04:30,911 >> Num examples = 16000\n",
"[INFO|trainer.py:1609] 2023-02-14 22:04:30,911 >> Num Epochs = 2\n",
"[INFO|trainer.py:1610] 2023-02-14 22:04:30,911 >> Instantaneous batch size per device = 8\n",
"[INFO|trainer.py:1611] 2023-02-14 22:04:30,911 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n",
"[INFO|trainer.py:1612] 2023-02-14 22:04:30,911 >> Gradient Accumulation steps = 1\n",
"[INFO|trainer.py:1613] 2023-02-14 22:04:30,911 >> Total optimization steps = 2500\n",
" 0% 0/2500 [00:00<?, ?it/s][WARNING|logging.py:281] 2023-02-14 22:04:30,925 >> You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n",
"{'loss': 21.5908, 'learning_rate': 4.8e-05, 'epoch': 0.05}\n",
"{'loss': 14.8264, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.1}\n",
" 10% 249/2500 [00:24<03:31, 10.64it/s][INFO|trainer.py:2907] 2023-02-14 22:04:55,366 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:04:55,366 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:04:55,366 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:11, 21.87it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:14, 16.90it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:15, 15.84it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.48it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:15, 15.16it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:15, 15.04it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:15, 14.99it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:15, 14.93it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.86it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.64it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.61it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:15, 14.67it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:15, 14.63it/s]\u001b[A\n",
" 12% 30/250 [00:01<00:15, 14.64it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:14, 14.69it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.67it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.63it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.47it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.49it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.42it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.46it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:14, 14.50it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.59it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.59it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.57it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.64it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.64it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:13, 14.68it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:12, 14.73it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.69it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.70it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.66it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.72it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.78it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:12, 14.72it/s]\u001b[A\n",
" 30% 74/250 [00:04<00:11, 14.71it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:11, 14.75it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:11, 14.69it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.67it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.67it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.65it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.71it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:11, 14.73it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.71it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.58it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.50it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.51it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.56it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.58it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:10, 14.51it/s]\u001b[A\n",
" 42% 104/250 [00:07<00:10, 14.39it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:10, 14.35it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.47it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.45it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.40it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.44it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.52it/s]\u001b[A\n",
" 47% 118/250 [00:08<00:09, 14.53it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:08, 14.55it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.61it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.64it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.66it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.61it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.70it/s]\u001b[A\n",
" 53% 132/250 [00:08<00:07, 14.78it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.78it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.73it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.79it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.64it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.61it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.67it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.71it/s]\u001b[A\n",
" 59% 148/250 [00:10<00:06, 14.71it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.68it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.72it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.79it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.37it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.37it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.45it/s]\u001b[A\n",
" 65% 162/250 [00:11<00:06, 14.46it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.55it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.56it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.60it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.62it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.21it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.41it/s]\u001b[A\n",
" 70% 176/250 [00:11<00:05, 14.53it/s]\u001b[A\n",
" 71% 178/250 [00:12<00:04, 14.60it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.64it/s]\u001b[A\n",
" 73% 182/250 [00:12<00:04, 14.67it/s]\u001b[A\n",
" 74% 184/250 [00:12<00:04, 14.72it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.75it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.67it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.74it/s]\u001b[A\n",
" 77% 192/250 [00:13<00:03, 14.80it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.86it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.81it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.80it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.83it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.78it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.78it/s]\u001b[A\n",
" 82% 206/250 [00:14<00:02, 14.73it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.79it/s]\u001b[A\n",
" 84% 210/250 [00:14<00:02, 14.85it/s]\u001b[A\n",
" 85% 212/250 [00:14<00:02, 14.85it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.86it/s]\u001b[A\n",
" 86% 216/250 [00:14<00:02, 14.89it/s]\u001b[A\n",
" 87% 218/250 [00:14<00:02, 14.83it/s]\u001b[A\n",
" 88% 220/250 [00:14<00:02, 14.85it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.80it/s]\u001b[A\n",
" 10% 250/2500 [00:39<03:31, 10.64it/s]\n",
" 90% 226/250 [00:15<00:01, 14.77it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.81it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.86it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.84it/s]\u001b[A\n",
" 94% 234/250 [00:15<00:01, 14.70it/s]\u001b[A\n",
" 94% 236/250 [00:16<00:00, 14.63it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.73it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.69it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.71it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.79it/s]\u001b[A\n",
" 98% 246/250 [00:16<00:00, 14.77it/s]\u001b[A\n",
" 99% 248/250 [00:16<00:00, 14.73it/s]\u001b[A\n",
"100% 250/250 [00:16<00:00, 14.71it/s]\u001b[A\n",
"{'eval_loss': 9.001160621643066, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.2175, 'eval_samples_per_second': 116.161, 'eval_steps_per_second': 14.52, 'epoch': 0.12}\n",
"\n",
" 10% 250/2500 [00:41<03:31, 10.64it/s]\n",
"{'loss': 10.5792, 'learning_rate': 4.4000000000000006e-05, 'epoch': 0.15}\n",
"{'loss': 7.8113, 'learning_rate': 4.2e-05, 'epoch': 0.2}\n",
"{'loss': 5.2658, 'learning_rate': 4e-05, 'epoch': 0.25}\n",
" 20% 500/2500 [01:05<03:04, 10.83it/s][INFO|trainer.py:2907] 2023-02-14 22:05:35,963 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:05:35,963 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:05:35,963 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:11, 22.27it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:14, 17.12it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:14, 16.18it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.53it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:15, 15.20it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:15, 15.04it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:15, 14.93it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:15, 14.86it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.89it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.91it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.77it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:15, 14.79it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:15, 14.68it/s]\u001b[A\n",
" 12% 30/250 [00:01<00:14, 14.68it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:14, 14.69it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.71it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.71it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.71it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.64it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.61it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.63it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:13, 14.63it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.74it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.78it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.77it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.77it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.69it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:13, 14.71it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:12, 14.78it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.77it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.77it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.76it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.77it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.84it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:12, 14.77it/s]\u001b[A\n",
" 30% 74/250 [00:04<00:11, 14.68it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:11, 14.75it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:11, 14.75it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.76it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.79it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.77it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.77it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:10, 14.74it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.74it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.77it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.80it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.78it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.76it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.78it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:09, 14.81it/s]\u001b[A\n",
" 42% 104/250 [00:06<00:09, 14.81it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:09, 14.75it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.81it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.86it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.83it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.87it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.87it/s]\u001b[A\n",
" 47% 118/250 [00:07<00:08, 14.85it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:08, 14.73it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.74it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.77it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.75it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.75it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.64it/s]\u001b[A\n",
" 53% 132/250 [00:08<00:08, 14.49it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.57it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.57it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.60it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.62it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.64it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.57it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.63it/s]\u001b[A\n",
" 59% 148/250 [00:09<00:06, 14.62it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.61it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.63it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.72it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.75it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.62it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.67it/s]\u001b[A\n",
" 65% 162/250 [00:10<00:06, 14.65it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.68it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.61it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.62it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.58it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.64it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.67it/s]\u001b[A\n",
" 70% 176/250 [00:11<00:05, 14.67it/s]\u001b[A\n",
" 71% 178/250 [00:12<00:04, 14.60it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.49it/s]\u001b[A\n",
" 73% 182/250 [00:12<00:04, 14.47it/s]\u001b[A\n",
" 74% 184/250 [00:12<00:04, 14.53it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.57it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.58it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.64it/s]\u001b[A\n",
" 77% 192/250 [00:12<00:03, 14.64it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.30it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.43it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.54it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.58it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.65it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.67it/s]\u001b[A\n",
" 82% 206/250 [00:13<00:02, 14.68it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.70it/s]\u001b[A\n",
" 84% 210/250 [00:14<00:02, 14.73it/s]\u001b[A\n",
" 85% 212/250 [00:14<00:02, 14.75it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.77it/s]\u001b[A\n",
" 20% 500/2500 [01:19<03:04, 10.83it/s]\n",
" 87% 218/250 [00:14<00:02, 14.84it/s]\u001b[A\n",
" 88% 220/250 [00:14<00:02, 14.87it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.83it/s]\u001b[A\n",
" 90% 224/250 [00:15<00:01, 14.79it/s]\u001b[A\n",
" 90% 226/250 [00:15<00:01, 14.69it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.68it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.64it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.54it/s]\u001b[A\n",
" 94% 234/250 [00:15<00:01, 14.60it/s]\u001b[A\n",
" 94% 236/250 [00:15<00:00, 14.66it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.73it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.76it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.76it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.82it/s]\u001b[A\n",
" 98% 246/250 [00:16<00:00, 14.82it/s]\u001b[A\n",
" 99% 248/250 [00:16<00:00, 14.78it/s]\u001b[A\n",
"100% 250/250 [00:16<00:00, 14.79it/s]\u001b[A\n",
"{'eval_loss': 2.1697170734405518, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.1551, 'eval_samples_per_second': 116.584, 'eval_steps_per_second': 14.573, 'epoch': 0.25}\n",
"\n",
" 20% 500/2500 [01:22<03:04, 10.83it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:05:53,119 >> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:05:53,120 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:05:53,749 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:05:53,750 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:05:53,750 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-500/special_tokens_map.json\n",
"[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:05:53,788 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-500/spiece.model\n",
"{'loss': 3.7795, 'learning_rate': 3.8e-05, 'epoch': 0.3}\n",
"{'loss': 2.9169, 'learning_rate': 3.6e-05, 'epoch': 0.35}\n",
" 30% 749/2500 [01:47<02:43, 10.71it/s][INFO|trainer.py:2907] 2023-02-14 22:06:18,135 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:06:18,136 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:06:18,136 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:11, 21.21it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:14, 16.54it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:15, 15.62it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.04it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:16, 14.78it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:16, 14.60it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:16, 14.53it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:16, 14.44it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.51it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.57it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.56it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:15, 14.65it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:15, 14.64it/s]\u001b[A\n",
" 12% 30/250 [00:02<00:15, 14.66it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:14, 14.63it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.67it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.64it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.60it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.58it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.59it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.65it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:13, 14.69it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.78it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.85it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.84it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.80it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.77it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:12, 14.77it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:12, 14.81it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.78it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.76it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.71it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.73it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.66it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:12, 14.69it/s]\u001b[A\n",
" 30% 74/250 [00:05<00:12, 14.64it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:11, 14.70it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:11, 14.70it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.76it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.76it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.71it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.74it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:10, 14.76it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.69it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.71it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.75it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.72it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.70it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.68it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:10, 14.69it/s]\u001b[A\n",
" 42% 104/250 [00:07<00:09, 14.72it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:09, 14.65it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.66it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.70it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.69it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.63it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.69it/s]\u001b[A\n",
" 47% 118/250 [00:07<00:08, 14.71it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:08, 14.59it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.68it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.68it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.71it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.73it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.64it/s]\u001b[A\n",
" 53% 132/250 [00:08<00:08, 14.70it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.74it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.41it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.46it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.51it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.60it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.50it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.53it/s]\u001b[A\n",
" 59% 148/250 [00:10<00:07, 14.55it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.53it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.48it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.60it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.54it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.46it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.42it/s]\u001b[A\n",
" 65% 162/250 [00:11<00:06, 14.38it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.38it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.32it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.33it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.23it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.23it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.24it/s]\u001b[A\n",
" 70% 176/250 [00:12<00:05, 14.21it/s]\u001b[A\n",
" 71% 178/250 [00:12<00:05, 14.17it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.16it/s]\u001b[A\n",
" 30% 750/2500 [01:59<02:43, 10.71it/s]\n",
" 74% 184/250 [00:12<00:04, 14.30it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.40it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.40it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.48it/s]\u001b[A\n",
" 77% 192/250 [00:13<00:03, 14.58it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.58it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.56it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.62it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.69it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.69it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.68it/s]\u001b[A\n",
" 82% 206/250 [00:14<00:02, 14.68it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.68it/s]\u001b[A\n",
" 84% 210/250 [00:14<00:02, 14.65it/s]\u001b[A\n",
" 85% 212/250 [00:14<00:02, 14.72it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.71it/s]\u001b[A\n",
" 86% 216/250 [00:14<00:02, 14.68it/s]\u001b[A\n",
" 87% 218/250 [00:14<00:02, 14.69it/s]\u001b[A\n",
" 88% 220/250 [00:15<00:02, 14.75it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.74it/s]\u001b[A\n",
" 90% 224/250 [00:15<00:01, 14.76it/s]\u001b[A\n",
" 90% 226/250 [00:15<00:01, 14.73it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.82it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.77it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.75it/s]\u001b[A\n",
" 94% 234/250 [00:15<00:01, 14.67it/s]\u001b[A\n",
" 94% 236/250 [00:16<00:00, 14.65it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.64it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.60it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.60it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.26it/s]\u001b[A\n",
" 98% 246/250 [00:16<00:00, 14.42it/s]\u001b[A\n",
" 99% 248/250 [00:16<00:00, 14.45it/s]\u001b[A\n",
"100% 250/250 [00:17<00:00, 14.54it/s]\u001b[A\n",
"{'eval_loss': 1.4527522325515747, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.2954, 'eval_samples_per_second': 115.638, 'eval_steps_per_second': 14.455, 'epoch': 0.38}\n",
"\n",
" 30% 750/2500 [02:04<02:43, 10.71it/s]\n",
"{'loss': 2.4516, 'learning_rate': 3.4000000000000007e-05, 'epoch': 0.4}\n",
"{'loss': 2.2293, 'learning_rate': 3.2000000000000005e-05, 'epoch': 0.45}\n",
"{'loss': 2.0123, 'learning_rate': 3e-05, 'epoch': 0.5}\n",
" 40% 1000/2500 [02:27<02:21, 10.63it/s][INFO|trainer.py:2907] 2023-02-14 22:06:58,636 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:06:58,636 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:06:58,636 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:12, 20.13it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:15, 16.26it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:15, 15.45it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.09it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:16, 14.85it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:16, 14.66it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:16, 14.56it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:15, 14.65it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.77it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.88it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.83it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:14, 14.94it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:14, 14.94it/s]\u001b[A\n",
" 12% 30/250 [00:01<00:14, 14.96it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:14, 14.80it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.82it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.73it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.59it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.47it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.47it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.53it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:14, 14.19it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.44it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.54it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.56it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.64it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.70it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:13, 14.71it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:12, 14.77it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.80it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.79it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.79it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.83it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.89it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:11, 14.88it/s]\u001b[A\n",
" 30% 74/250 [00:04<00:11, 14.83it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:11, 14.83it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:11, 14.83it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.81it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.78it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.78it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.85it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:10, 14.79it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.68it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.71it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.76it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.70it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.74it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.72it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:10, 14.76it/s]\u001b[A\n",
" 42% 104/250 [00:07<00:09, 14.79it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:09, 14.72it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.81it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.84it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.83it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.82it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.85it/s]\u001b[A\n",
" 47% 118/250 [00:07<00:08, 14.85it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:08, 14.80it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.85it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.87it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.88it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.78it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.78it/s]\u001b[A\n",
" 53% 132/250 [00:08<00:07, 14.81it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.79it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.77it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.77it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.81it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.84it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.84it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.83it/s]\u001b[A\n",
" 59% 148/250 [00:09<00:06, 14.83it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.74it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.68it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.76it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.77it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.77it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.80it/s]\u001b[A\n",
" 65% 162/250 [00:10<00:05, 14.70it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.68it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.62it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.69it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.75it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.82it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.87it/s]\u001b[A\n",
" 40% 1000/2500 [02:39<02:21, 10.63it/s]\n",
" 71% 178/250 [00:12<00:04, 14.75it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.69it/s]\u001b[A\n",
" 73% 182/250 [00:12<00:04, 14.68it/s]\u001b[A\n",
" 74% 184/250 [00:12<00:04, 14.68it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.73it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.69it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.71it/s]\u001b[A\n",
" 77% 192/250 [00:12<00:03, 14.65it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.65it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.61it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.66it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.63it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.65it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.66it/s]\u001b[A\n",
" 82% 206/250 [00:13<00:03, 14.58it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.63it/s]\u001b[A\n",
" 84% 210/250 [00:14<00:02, 14.68it/s]\u001b[A\n",
" 85% 212/250 [00:14<00:02, 14.65it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.69it/s]\u001b[A\n",
" 86% 216/250 [00:14<00:02, 14.72it/s]\u001b[A\n",
" 87% 218/250 [00:14<00:02, 14.67it/s]\u001b[A\n",
" 88% 220/250 [00:14<00:02, 14.74it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.70it/s]\u001b[A\n",
" 90% 224/250 [00:15<00:01, 14.64it/s]\u001b[A\n",
" 90% 226/250 [00:15<00:01, 14.67it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.70it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.69it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.76it/s]\u001b[A\n",
" 94% 234/250 [00:15<00:01, 14.76it/s]\u001b[A\n",
" 94% 236/250 [00:15<00:00, 14.73it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.82it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.87it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.88it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.90it/s]\u001b[A\n",
" 98% 246/250 [00:16<00:00, 14.91it/s]\u001b[A\n",
" 99% 248/250 [00:16<00:00, 14.90it/s]\u001b[A\n",
"100% 250/250 [00:16<00:00, 14.92it/s]\u001b[A\n",
"{'eval_loss': 1.160749912261963, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.1471, 'eval_samples_per_second': 116.638, 'eval_steps_per_second': 14.58, 'epoch': 0.5}\n",
"\n",
" 40% 1000/2500 [02:44<02:21, 10.63it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:07:15,784 >> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-1000\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:07:15,785 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-1000/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:07:16,414 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-1000/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:07:16,415 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-1000/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:07:16,416 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-1000/special_tokens_map.json\n",
"[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:07:16,453 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-1000/spiece.model\n",
"{'loss': 1.9003, 'learning_rate': 2.8000000000000003e-05, 'epoch': 0.55}\n",
"{'loss': 1.7884, 'learning_rate': 2.6000000000000002e-05, 'epoch': 0.6}\n",
" 50% 1249/2500 [03:09<01:59, 10.49it/s][INFO|trainer.py:2907] 2023-02-14 22:07:40,879 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:07:40,879 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:07:40,879 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:11, 21.99it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:14, 17.06it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:15, 16.09it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.50it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:15, 15.00it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:15, 14.84it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:15, 14.74it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:15, 14.69it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.74it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.73it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.70it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:15, 14.71it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:15, 14.56it/s]\u001b[A\n",
" 12% 30/250 [00:01<00:15, 14.62it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:14, 14.64it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.56it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.57it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.60it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.60it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.57it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.61it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:13, 14.64it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.75it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.78it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.73it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.71it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.68it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:13, 14.63it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:12, 14.74it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.73it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.68it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.64it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.65it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.68it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:12, 14.29it/s]\u001b[A\n",
" 30% 74/250 [00:05<00:12, 14.38it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:12, 14.47it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:11, 14.52it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.64it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.66it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.64it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.66it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:11, 14.72it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.73it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.69it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.75it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.69it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.64it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.67it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:10, 14.71it/s]\u001b[A\n",
" 42% 104/250 [00:07<00:09, 14.75it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:09, 14.71it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.80it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.84it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.73it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.73it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.67it/s]\u001b[A\n",
" 47% 118/250 [00:07<00:09, 14.50it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:08, 14.51it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.63it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.69it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.67it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.62it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.60it/s]\u001b[A\n",
" 53% 132/250 [00:08<00:08, 14.59it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.64it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.65it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.71it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.67it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.70it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.67it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.62it/s]\u001b[A\n",
" 59% 148/250 [00:10<00:06, 14.65it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.58it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.55it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.58it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.57it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.59it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.66it/s]\u001b[A\n",
" 65% 162/250 [00:11<00:06, 14.53it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.72it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.60it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.52it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.50it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.49it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.47it/s]\u001b[A\n",
" 70% 176/250 [00:11<00:05, 14.37it/s]\u001b[A\n",
" 71% 178/250 [00:12<00:05, 14.29it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.27it/s]\u001b[A\n",
" 73% 182/250 [00:12<00:04, 14.25it/s]\u001b[A\n",
" 74% 184/250 [00:12<00:04, 14.27it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.24it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.18it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.22it/s]\u001b[A\n",
" 77% 192/250 [00:13<00:04, 14.16it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.21it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.22it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.27it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.28it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.16it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.06it/s]\u001b[A\n",
" 82% 206/250 [00:14<00:03, 14.05it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.06it/s]\u001b[A\n",
" 84% 210/250 [00:14<00:02, 14.06it/s]\u001b[A\n",
" 85% 212/250 [00:14<00:02, 13.87it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.01it/s]\u001b[A\n",
" 86% 216/250 [00:14<00:02, 14.22it/s]\u001b[A\n",
" 87% 218/250 [00:14<00:02, 14.28it/s]\u001b[A\n",
" 88% 220/250 [00:15<00:02, 14.42it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.39it/s]\u001b[A\n",
" 90% 224/250 [00:15<00:01, 14.35it/s]\u001b[A\n",
" 90% 226/250 [00:15<00:01, 14.49it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.57it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.65it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.74it/s]\u001b[A\n",
" 94% 234/250 [00:16<00:01, 14.73it/s]\u001b[A\n",
" 94% 236/250 [00:16<00:00, 14.74it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.80it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.79it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.78it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.83it/s]\u001b[A\n",
" 98% 246/250 [00:16<00:00, 14.81it/s]\u001b[A\n",
" 99% 248/250 [00:16<00:00, 14.72it/s]\u001b[A\n",
"100% 250/250 [00:17<00:00, 14.63it/s]\u001b[A\n",
"{'eval_loss': 1.0410572290420532, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.3319, 'eval_samples_per_second': 115.394, 'eval_steps_per_second': 14.424, 'epoch': 0.62}\n",
"\n",
" 50% 1250/2500 [03:27<01:59, 10.49it/s]\n",
"{'loss': 1.7415, 'learning_rate': 2.4e-05, 'epoch': 0.65}\n",
"{'loss': 1.6231, 'learning_rate': 2.2000000000000003e-05, 'epoch': 0.7}\n",
"{'loss': 1.5278, 'learning_rate': 2e-05, 'epoch': 0.75}\n",
" 60% 1500/2500 [03:50<01:33, 10.71it/s][INFO|trainer.py:2907] 2023-02-14 22:08:21,432 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:08:21,433 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:08:21,433 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:11, 21.79it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:14, 16.88it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:15, 15.94it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.36it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:15, 14.98it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:16, 14.72it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:16, 14.47it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:16, 14.40it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.48it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.54it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.57it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:15, 14.56it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:15, 14.52it/s]\u001b[A\n",
" 12% 30/250 [00:02<00:15, 14.53it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:15, 14.51it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.51it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.53it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.51it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.52it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.52it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.53it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:13, 14.62it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.58it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.66it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.70it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.75it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.69it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:13, 14.72it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:12, 14.72it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.72it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.66it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.65it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.72it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.80it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:12, 14.80it/s]\u001b[A\n",
" 30% 74/250 [00:05<00:11, 14.74it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:11, 14.77it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:11, 14.59it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.69it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.69it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.67it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.75it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:10, 14.80it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.82it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.80it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.81it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.78it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.78it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.73it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:10, 14.73it/s]\u001b[A\n",
" 42% 104/250 [00:07<00:09, 14.81it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:09, 14.73it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.74it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.78it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.73it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.75it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.80it/s]\u001b[A\n",
" 47% 118/250 [00:07<00:08, 14.80it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:08, 14.79it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.81it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.76it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.80it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.80it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.81it/s]\u001b[A\n",
" 53% 132/250 [00:08<00:07, 14.82it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.82it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.70it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.70it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.72it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.73it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.71it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.74it/s]\u001b[A\n",
" 59% 148/250 [00:10<00:06, 14.74it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.78it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.74it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.79it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.79it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.78it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.85it/s]\u001b[A\n",
" 65% 162/250 [00:10<00:05, 14.82it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.85it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.89it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.85it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.67it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.56it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.69it/s]\u001b[A\n",
" 70% 176/250 [00:11<00:05, 14.70it/s]\u001b[A\n",
" 71% 178/250 [00:12<00:04, 14.69it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.73it/s]\u001b[A\n",
" 73% 182/250 [00:12<00:04, 14.75it/s]\u001b[A\n",
" 74% 184/250 [00:12<00:04, 14.78it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.85it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.87it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.91it/s]\u001b[A\n",
" 77% 192/250 [00:12<00:03, 14.91it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.81it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.65it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.54it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.59it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.63it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.63it/s]\u001b[A\n",
" 82% 206/250 [00:13<00:03, 14.50it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.58it/s]\u001b[A\n",
" 84% 210/250 [00:14<00:02, 14.65it/s]\u001b[A\n",
" 85% 212/250 [00:14<00:02, 14.65it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.49it/s]\u001b[A\n",
" 86% 216/250 [00:14<00:02, 14.58it/s]\u001b[A\n",
" 87% 218/250 [00:14<00:02, 14.58it/s]\u001b[A\n",
" 88% 220/250 [00:14<00:02, 14.66it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.57it/s]\u001b[A\n",
" 90% 224/250 [00:15<00:01, 14.56it/s]\u001b[A\n",
" 90% 226/250 [00:15<00:01, 14.58it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.56it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.55it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.49it/s]\u001b[A\n",
" 94% 234/250 [00:15<00:01, 14.42it/s]\u001b[A\n",
" 94% 236/250 [00:16<00:00, 14.39it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.40it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.35it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.37it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.43it/s]\u001b[A\n",
" 98% 246/250 [00:16<00:00, 14.44it/s]\u001b[A\n",
" 99% 248/250 [00:16<00:00, 14.44it/s]\u001b[A\n",
"100% 250/250 [00:16<00:00, 14.48it/s]\u001b[A\n",
"{'eval_loss': 0.9458380341529846, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.379, 'eval_samples_per_second': 115.081, 'eval_steps_per_second': 14.385, 'epoch': 0.75}\n",
"\n",
" 60% 1500/2500 [04:07<01:33, 10.71it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:08:38,813 >> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-1500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:08:38,814 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-1500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:08:39,285 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-1500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:08:39,286 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-1500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:08:39,286 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-1500/special_tokens_map.json\n",
"[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:08:39,322 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-1500/spiece.model\n",
"{'loss': 1.4835, 'learning_rate': 1.8e-05, 'epoch': 0.8}\n",
"{'loss': 1.449, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.85}\n",
" 70% 1749/2500 [04:32<01:10, 10.61it/s][INFO|trainer.py:2907] 2023-02-14 22:09:03,363 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:09:03,363 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:09:03,363 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:11, 22.10it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:14, 17.10it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:14, 16.16it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.48it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:15, 15.17it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:15, 15.00it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:15, 14.90it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:15, 14.70it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.59it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.58it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.51it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:15, 14.59it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:15, 14.60it/s]\u001b[A\n",
" 12% 30/250 [00:01<00:15, 14.64it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:15, 14.49it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.52it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.45it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.39it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.44it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.41it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.46it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:14, 14.46it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.54it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.47it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.51it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.54it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.62it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:13, 14.65it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:12, 14.69it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.75it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.58it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.53it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.61it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.65it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:12, 14.66it/s]\u001b[A\n",
" 30% 74/250 [00:05<00:11, 14.67it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:11, 14.70it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:11, 14.63it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.64it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.63it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.67it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.75it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:10, 14.75it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.78it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.83it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.73it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.68it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.65it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.68it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:10, 14.74it/s]\u001b[A\n",
" 42% 104/250 [00:07<00:09, 14.80it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:09, 14.77it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.78it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.83it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.76it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.80it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.68it/s]\u001b[A\n",
" 47% 118/250 [00:08<00:08, 14.68it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:08, 14.59it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.60it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.58it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.63it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.64it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.67it/s]\u001b[A\n",
" 53% 132/250 [00:08<00:08, 14.66it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.74it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.74it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.71it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.67it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.66it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.65it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.66it/s]\u001b[A\n",
" 59% 148/250 [00:10<00:06, 14.62it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.64it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.63it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.60it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.52it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.55it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.63it/s]\u001b[A\n",
" 65% 162/250 [00:11<00:06, 14.62it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.65it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.62it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.69it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.71it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.72it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.54it/s]\u001b[A\n",
" 70% 176/250 [00:11<00:05, 14.61it/s]\u001b[A\n",
" 71% 178/250 [00:12<00:04, 14.65it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.67it/s]\u001b[A\n",
" 73% 182/250 [00:12<00:04, 14.65it/s]\u001b[A\n",
" 74% 184/250 [00:12<00:04, 14.65it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.66it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.62it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.69it/s]\u001b[A\n",
" 77% 192/250 [00:13<00:03, 14.74it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.81it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.77it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.79it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.50it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.42it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.48it/s]\u001b[A\n",
" 82% 206/250 [00:14<00:03, 14.52it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.56it/s]\u001b[A\n",
" 84% 210/250 [00:14<00:02, 14.53it/s]\u001b[A\n",
" 85% 212/250 [00:14<00:02, 14.58it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.61it/s]\u001b[A\n",
" 86% 216/250 [00:14<00:02, 14.69it/s]\u001b[A\n",
" 87% 218/250 [00:14<00:02, 14.70it/s]\u001b[A\n",
" 88% 220/250 [00:14<00:02, 14.75it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.70it/s]\u001b[A\n",
" 90% 224/250 [00:15<00:01, 14.75it/s]\u001b[A\n",
" 90% 226/250 [00:15<00:01, 14.71it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.74it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.68it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.72it/s]\u001b[A\n",
" 94% 234/250 [00:15<00:01, 14.71it/s]\u001b[A\n",
" 94% 236/250 [00:16<00:00, 14.62it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.59it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.57it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.61it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.67it/s]\u001b[A\n",
" 98% 246/250 [00:16<00:00, 14.60it/s]\u001b[A\n",
" 99% 248/250 [00:16<00:00, 14.63it/s]\u001b[A\n",
"100% 250/250 [00:17<00:00, 14.66it/s]\u001b[A\n",
"{'eval_loss': 0.8559792637825012, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.2321, 'eval_samples_per_second': 116.063, 'eval_steps_per_second': 14.508, 'epoch': 0.88}\n",
"\n",
" 70% 1750/2500 [04:49<01:10, 10.61it/s]\n",
"{'loss': 1.4421, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.9}\n",
"{'loss': 1.3835, 'learning_rate': 1.2e-05, 'epoch': 0.95}\n",
"{'loss': 1.325, 'learning_rate': 1e-05, 'epoch': 1.0}\n",
" 80% 2000/2500 [05:12<00:45, 10.89it/s][INFO|trainer.py:2907] 2023-02-14 22:09:43,863 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:09:43,863 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:09:43,863 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:11, 21.99it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:14, 17.18it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:14, 16.14it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.55it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:15, 15.22it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:15, 15.01it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:15, 14.86it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:15, 14.84it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.87it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.65it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.46it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:15, 14.51it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:15, 14.51it/s]\u001b[A\n",
" 12% 30/250 [00:01<00:15, 14.50it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:14, 14.59it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.65it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.69it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.72it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.71it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.69it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.61it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:14, 14.52it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.56it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.62it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.60it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.56it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.40it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:13, 14.43it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:13, 14.46it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.51it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.50it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.44it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.49it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.50it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:12, 14.52it/s]\u001b[A\n",
" 30% 74/250 [00:05<00:12, 14.53it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:12, 14.50it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:12, 14.33it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.36it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.41it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.37it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.42it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:11, 14.52it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.55it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.57it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.63it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.64it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.57it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.51it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:10, 14.60it/s]\u001b[A\n",
" 42% 104/250 [00:07<00:09, 14.63it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:09, 14.57it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.67it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.68it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.65it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.65it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.52it/s]\u001b[A\n",
" 47% 118/250 [00:08<00:09, 14.52it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:08, 14.51it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.61it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.70it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.75it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.71it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.72it/s]\u001b[A\n",
" 53% 132/250 [00:08<00:08, 14.71it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.76it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.77it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.81it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.87it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.90it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.69it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.69it/s]\u001b[A\n",
" 59% 148/250 [00:10<00:06, 14.70it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.62it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.63it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.73it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.71it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.64it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.64it/s]\u001b[A\n",
" 65% 162/250 [00:11<00:05, 14.70it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.78it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.78it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.82it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.86it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.87it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.91it/s]\u001b[A\n",
" 70% 176/250 [00:11<00:05, 14.53it/s]\u001b[A\n",
" 71% 178/250 [00:12<00:04, 14.56it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.57it/s]\u001b[A\n",
" 73% 182/250 [00:12<00:04, 14.63it/s]\u001b[A\n",
" 74% 184/250 [00:12<00:04, 14.69it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.75it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.69it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.65it/s]\u001b[A\n",
" 77% 192/250 [00:13<00:03, 14.69it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.64it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.67it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.72it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.70it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.61it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.59it/s]\u001b[A\n",
" 82% 206/250 [00:14<00:03, 14.53it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.63it/s]\u001b[A\n",
" 84% 210/250 [00:14<00:02, 14.70it/s]\u001b[A\n",
" 85% 212/250 [00:14<00:02, 14.68it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.67it/s]\u001b[A\n",
" 86% 216/250 [00:14<00:02, 14.73it/s]\u001b[A\n",
" 87% 218/250 [00:14<00:02, 14.76it/s]\u001b[A\n",
" 88% 220/250 [00:14<00:02, 14.74it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.74it/s]\u001b[A\n",
" 90% 224/250 [00:15<00:01, 14.75it/s]\u001b[A\n",
" 90% 226/250 [00:15<00:01, 14.74it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.70it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.60it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.68it/s]\u001b[A\n",
" 94% 234/250 [00:15<00:01, 14.47it/s]\u001b[A\n",
" 94% 236/250 [00:16<00:00, 14.53it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.60it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.61it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.66it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.70it/s]\u001b[A\n",
" 80% 2000/2500 [05:29<00:45, 10.89it/s]\n",
" 99% 248/250 [00:16<00:00, 14.70it/s]\u001b[A\n",
"100% 250/250 [00:17<00:00, 14.62it/s]\u001b[A\n",
"{'eval_loss': 0.8163257241249084, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.2395, 'eval_samples_per_second': 116.013, 'eval_steps_per_second': 14.502, 'epoch': 1.0}\n",
"\n",
" 80% 2000/2500 [05:30<00:45, 10.89it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:10:01,104 >> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-2000\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:10:01,105 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-2000/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:10:01,585 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-2000/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:10:01,586 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-2000/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:10:01,586 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-2000/special_tokens_map.json\n",
"[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:10:01,623 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-2000/spiece.model\n",
"{'loss': 1.2708, 'learning_rate': 8.000000000000001e-06, 'epoch': 1.05}\n",
"{'loss': 1.3351, 'learning_rate': 6e-06, 'epoch': 1.1}\n",
" 90% 2249/2500 [05:54<00:23, 10.80it/s][INFO|trainer.py:2907] 2023-02-14 22:10:25,736 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:10:25,736 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:10:25,736 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:11, 21.89it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:14, 16.90it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:15, 16.04it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.53it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:15, 15.20it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:15, 14.99it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:15, 14.93it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:15, 14.90it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.70it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.76it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.76it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:15, 14.80it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:15, 14.78it/s]\u001b[A\n",
" 12% 30/250 [00:01<00:15, 14.64it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:14, 14.61it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.64it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.56it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.63it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.69it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.72it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.57it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:14, 14.53it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.45it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.54it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.55it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.57it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.53it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:13, 14.45it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:13, 14.50it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.57it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.41it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.43it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.54it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.54it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:12, 14.48it/s]\u001b[A\n",
" 30% 74/250 [00:05<00:12, 14.39it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:11, 14.52it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:11, 14.52it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.50it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.49it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.54it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.62it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:11, 14.63it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.59it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.69it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.65it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.60it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.63it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.66it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:10, 14.65it/s]\u001b[A\n",
" 42% 104/250 [00:07<00:09, 14.69it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:09, 14.67it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.75it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.77it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.76it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.78it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.82it/s]\u001b[A\n",
" 47% 118/250 [00:08<00:08, 14.79it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:08, 14.80it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.80it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.83it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.81it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.78it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.77it/s]\u001b[A\n",
" 53% 132/250 [00:08<00:07, 14.80it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.70it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.65it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.65it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.61it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.69it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.75it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.72it/s]\u001b[A\n",
" 59% 148/250 [00:10<00:06, 14.69it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.65it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.62it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.60it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.64it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.63it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.71it/s]\u001b[A\n",
" 65% 162/250 [00:10<00:05, 14.69it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.77it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.78it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.79it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.73it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.73it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.79it/s]\u001b[A\n",
" 70% 176/250 [00:11<00:05, 14.80it/s]\u001b[A\n",
" 71% 178/250 [00:12<00:04, 14.70it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.66it/s]\u001b[A\n",
" 73% 182/250 [00:12<00:04, 14.67it/s]\u001b[A\n",
" 74% 184/250 [00:12<00:04, 14.71it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.76it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.73it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.79it/s]\u001b[A\n",
" 77% 192/250 [00:13<00:03, 14.72it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.64it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.67it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.68it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.74it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.74it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.67it/s]\u001b[A\n",
" 82% 206/250 [00:13<00:03, 14.65it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.31it/s]\u001b[A\n",
" 84% 210/250 [00:14<00:02, 14.45it/s]\u001b[A\n",
" 85% 212/250 [00:14<00:02, 14.61it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.60it/s]\u001b[A\n",
" 86% 216/250 [00:14<00:02, 14.70it/s]\u001b[A\n",
" 90% 2250/2500 [06:09<00:23, 10.80it/s]\n",
" 88% 220/250 [00:14<00:02, 14.71it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.67it/s]\u001b[A\n",
" 90% 224/250 [00:15<00:01, 14.73it/s]\u001b[A\n",
" 90% 226/250 [00:15<00:01, 14.77it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.83it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.84it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.82it/s]\u001b[A\n",
" 94% 234/250 [00:15<00:01, 14.80it/s]\u001b[A\n",
" 94% 236/250 [00:16<00:00, 14.78it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.63it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.62it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.66it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.69it/s]\u001b[A\n",
" 98% 246/250 [00:16<00:00, 14.68it/s]\u001b[A\n",
" 99% 248/250 [00:16<00:00, 14.62it/s]\u001b[A\n",
"100% 250/250 [00:16<00:00, 14.59it/s]\u001b[A\n",
"{'eval_loss': 0.8037287592887878, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.2062, 'eval_samples_per_second': 116.237, 'eval_steps_per_second': 14.53, 'epoch': 1.12}\n",
"\n",
" 90% 2250/2500 [06:12<00:23, 10.80it/s]\n",
"{'loss': 1.2308, 'learning_rate': 4.000000000000001e-06, 'epoch': 1.15}\n",
"{'loss': 1.376, 'learning_rate': 2.0000000000000003e-06, 'epoch': 1.2}\n",
"{'loss': 1.2416, 'learning_rate': 0.0, 'epoch': 1.25}\n",
"100% 2500/2500 [06:35<00:00, 10.84it/s][INFO|trainer.py:2907] 2023-02-14 22:11:06,282 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:11:06,283 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:11:06,283 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:11, 21.34it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:14, 16.78it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:15, 15.85it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.37it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:15, 15.00it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:15, 14.91it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:15, 14.80it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:15, 14.76it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.78it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.67it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.60it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:15, 14.65it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:15, 14.63it/s]\u001b[A\n",
" 12% 30/250 [00:01<00:14, 14.67it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:14, 14.64it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.68it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.62it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.53it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.59it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.63it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.57it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:13, 14.67it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.73it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.82it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.79it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.71it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.70it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:13, 14.59it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:13, 14.53it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.46it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.47it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.48it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.65it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.77it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:12, 14.74it/s]\u001b[A\n",
" 30% 74/250 [00:04<00:12, 14.66it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:11, 14.67it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:11, 14.68it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.70it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.66it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.61it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.62it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:11, 14.56it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.59it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.51it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.38it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.33it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.30it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.35it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:10, 14.40it/s]\u001b[A\n",
" 42% 104/250 [00:07<00:10, 14.40it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:10, 14.36it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.27it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.36it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.34it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.33it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.31it/s]\u001b[A\n",
" 47% 118/250 [00:08<00:09, 14.35it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:09, 14.41it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.47it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.50it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.59it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.56it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.59it/s]\u001b[A\n",
" 53% 132/250 [00:09<00:08, 14.59it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.67it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.62it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.57it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.65it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.69it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.76it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.65it/s]\u001b[A\n",
" 59% 148/250 [00:10<00:06, 14.67it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.75it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.59it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.68it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.72it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.66it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.72it/s]\u001b[A\n",
" 65% 162/250 [00:11<00:05, 14.67it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.69it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.70it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.67it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.65it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.71it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.72it/s]\u001b[A\n",
" 70% 176/250 [00:12<00:05, 14.71it/s]\u001b[A\n",
" 71% 178/250 [00:12<00:04, 14.68it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.56it/s]\u001b[A\n",
" 73% 182/250 [00:12<00:04, 14.55it/s]\u001b[A\n",
" 74% 184/250 [00:12<00:04, 14.62it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.63it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.64it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.71it/s]\u001b[A\n",
" 77% 192/250 [00:13<00:03, 14.64it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.71it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.66it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.67it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.73it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.69it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.60it/s]\u001b[A\n",
" 82% 206/250 [00:14<00:03, 14.59it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.49it/s]\u001b[A\n",
"100% 2500/2500 [06:49<00:00, 10.84it/s]\n",
" 85% 212/250 [00:14<00:02, 14.53it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.51it/s]\u001b[A\n",
" 86% 216/250 [00:14<00:02, 14.54it/s]\u001b[A\n",
" 87% 218/250 [00:14<00:02, 14.56it/s]\u001b[A\n",
" 88% 220/250 [00:15<00:02, 14.67it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.66it/s]\u001b[A\n",
" 90% 224/250 [00:15<00:01, 14.68it/s]\u001b[A\n",
" 90% 226/250 [00:15<00:01, 14.68it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.78it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.83it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.82it/s]\u001b[A\n",
" 94% 234/250 [00:15<00:01, 14.74it/s]\u001b[A\n",
" 94% 236/250 [00:16<00:00, 14.72it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.71it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.70it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.73it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.74it/s]\u001b[A\n",
" 98% 246/250 [00:16<00:00, 14.65it/s]\u001b[A\n",
" 99% 248/250 [00:16<00:00, 14.69it/s]\u001b[A\n",
"100% 250/250 [00:17<00:00, 14.64it/s]\u001b[A\n",
"{'eval_loss': 0.7921838760375977, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.2721, 'eval_samples_per_second': 115.794, 'eval_steps_per_second': 14.474, 'epoch': 1.25}\n",
"\n",
"100% 2500/2500 [06:52<00:00, 10.84it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:11:23,556 >> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-2500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:11:23,557 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-2500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:11:24,033 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-2500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:11:24,034 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-2500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:11:24,034 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-2500/special_tokens_map.json\n",
"[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:11:24,070 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-2500/spiece.model\n",
"[INFO|trainer.py:1852] 2023-02-14 22:11:24,853 >> \n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"[INFO|trainer.py:1946] 2023-02-14 22:11:24,854 >> Loading best model from out/emotion/t5_v1_1/checkpoint-500 (score: 1.0).\n",
"{'train_runtime': 414.2608, 'train_samples_per_second': 48.279, 'train_steps_per_second': 6.035, 'train_loss': 3.8232721221923827, 'epoch': 1.25}\n",
"100% 2500/2500 [06:54<00:00, 6.03it/s]\n",
"[INFO|trainer.py:2656] 2023-02-14 22:11:25,173 >> Saving model checkpoint to out/emotion/t5_v1_1\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:11:25,174 >> Configuration saved in out/emotion/t5_v1_1/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:11:25,662 >> Model weights saved in out/emotion/t5_v1_1/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:11:25,663 >> tokenizer config file saved in out/emotion/t5_v1_1/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:11:25,663 >> Special tokens file saved in out/emotion/t5_v1_1/special_tokens_map.json\n",
"[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:11:25,703 >> Copy vocab file to out/emotion/t5_v1_1/spiece.model\n",
"***** train metrics *****\n",
" epoch = 1.25\n",
" train_loss = 3.8233\n",
" train_runtime = 0:06:54.26\n",
" train_samples = 16000\n",
" train_samples_per_second = 48.279\n",
" train_steps_per_second = 6.035\n",
"INFO:__main__:*** Evaluate ***\n",
"[INFO|trainer.py:2907] 2023-02-14 22:11:25,713 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:11:25,713 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:11:25,713 >> Batch size = 8\n",
"100% 250/250 [00:17<00:00, 14.50it/s]\n",
"***** eval metrics *****\n",
" epoch = 1.25\n",
" eval_accuracy = 1.0\n",
" eval_bleu = 0.0\n",
" eval_gen_len = 2.0\n",
" eval_loss = 2.1697\n",
" eval_runtime = 0:00:17.31\n",
" eval_samples = 2000\n",
" eval_samples_per_second = 115.494\n",
" eval_steps_per_second = 14.437\n",
"INFO:__main__:*** Predict ***\n",
"[INFO|trainer.py:2907] 2023-02-14 22:11:43,033 >> ***** Running Prediction *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:11:43,033 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:11:43,034 >> Batch size = 8\n",
"100% 250/250 [00:17<00:00, 14.58it/s]\n",
"***** predict metrics *****\n",
" predict_accuracy = 1.0\n",
" predict_bleu = 0.0\n",
" predict_gen_len = 2.0\n",
" predict_loss = 2.1029\n",
" predict_runtime = 0:00:17.21\n",
" predict_samples = 2000\n",
" predict_samples_per_second = 116.158\n",
" predict_steps_per_second = 14.52\n",
"[INFO|modelcard.py:444] 2023-02-14 22:12:00,417 >> Dropping the following result as it does not have all the necessary fields:\n",
"{'task': {'name': 'Translation', 'type': 'translation'}, 'metrics': [{'name': 'Bleu', 'type': 'bleu', 'value': 0.0}, {'name': 'Accuracy', 'type': 'accuracy', 'value': 1.0}]}\n"
]
}
],
"source": [
"!python run_translation.py \\\n",
" --cache_dir t5_cache_training \\\n",
" --model_name_or_path \"google/t5-v1_1-small\" \\\n",
" --train_file data/s2s-train.json \\\n",
" --validation_file data/s2s-valid.json \\\n",
" --test_file data/s2s-test.json \\\n",
" --per_device_train_batch_size 8 \\\n",
" --per_device_eval_batch_size 8 \\\n",
" --source_lang \"text\" \\\n",
" --target_lang \"label\" \\\n",
" --source_prefix \"emotion classification\" \\\n",
" --max_source_length 256 \\\n",
" --max_target_length 128 \\\n",
" --generation_max_length 128 \\\n",
" --do_train \\\n",
" --do_eval \\\n",
" --do_predict \\\n",
" --predict_with_generate \\\n",
" --num_train_epochs 1 \\\n",
" --output_dir out/emotion/t5_v1_1 \\\n",
" --overwrite_output_dir \\\n",
" --eval_steps 250 \\\n",
" --evaluation_strategy steps \\\n",
" --metric_for_best_model accuracy \\\n",
" --logging_steps 100 \\\n",
" --save_total_limit 5 \\\n",
" --max_steps 2500 \\\n",
" --load_best_model_at_end True "
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "XyC_7Ov07ICm"
},
"source": [
"# **FLAN T5**"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"id": "nX6LOzsF7ICm"
},
"outputs": [],
"source": [
"from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"id": "EEuIugWA7ICm"
},
"outputs": [],
"source": [
"if torch.cuda.is_available():\n",
" device = 0\n",
"else:\n",
" device = -1"
]
},
{
"cell_type": "code",
"source": [
"def perform_shot_learning(pipeline_type, model_name, test_file):\n",
" class_type = AutoModelForSeq2SeqLM\n",
" model = class_type.from_pretrained(model_name, torch_dtype=torch.float32)\n",
" tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
"\n",
" our_pipeline = pipeline(pipeline_type, model=model, tokenizer=tokenizer, device=device)\n",
"\n",
" correct = 0\n",
"\n",
" labels = \"possible labels: sadness, joy, love, anger, fear, surprise\"\n",
"\n",
" with open(test_file) as f:\n",
" f_lines = f.readlines()\n",
" for line in f_lines:\n",
" ex = json.loads(line)\n",
" prompt = ex['text']\n",
"\n",
" tmp = labels + '\\n' + f'text: {prompt}' + '\\n' + 'label: '\n",
" \n",
" predict = our_pipeline(tmp, do_sample=False)[0]['generated_text']\n",
"\n",
" if predict == ex['label']:\n",
" correct += 1\n",
"\n",
" print(f'Accuracy: {correct/len(f_lines)}')"
],
"metadata": {
"id": "AtDz85GKalzg"
},
"execution_count": 15,
"outputs": []
},
{
"cell_type": "code",
"source": [
"test_ds = 'data/s2s-test.json'"
],
"metadata": {
"id": "q9-4fzxpaoff"
},
"execution_count": 16,
"outputs": []
},
{
"cell_type": "code",
"source": [
"perform_shot_learning('text2text-generation', 'google/flan-t5-large', test_ds)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 219,
"referenced_widgets": [
"18f03144f5194bd2a88064eaae1140f0",
"36b8333766d44ee2aaa8da8ee75975d2",
"f7a9b125cf1346468e428abd689ff800",
"9b9e6161874f41c98d5c5e55d8d4fc86",
"9925a6f17ba14eee96332f0ea1dc88e5",
"dce64adfb8334591a8ce182918ecb4e3",
"9efd8cd2208245aca3f369f0735e2ee1",
"3d05704ffb0040c8b5bfb5c068c3329b",
"9564dcdd10c64072bb09e70def311ff3",
"f406c9b52a274068bd636554558497b2",
"d97be50f8cc64f8680a6cce112863255",
"7d6b88e56dad4dcbb0f1b1720f1ff118",
"eabc78cbdeef40feb36cf90fdbcdfbc7",
"6477d99dffbc4cf39e2c6998f71e37f7",
"d63511a8852942309cabe53720939fcc",
"3096b59f64eb48659a8eedea5a171be4",
"acc58b06f3b54801b10ee872fab39e6e",
"c2bd9c9ddab848529e52adfdc7634044",
"8d7e8c29d7e247f1b55d329d40508526",
"457b70adcab0464c9f990b13f433c635",
"0858fe327ec549b488f6169de1d84654",
"e18a505153c7491f8900142fb1189cd7",
"945026e5e11448b39ab37fb2a0bd963c",
"8c3aa97d58cb4f21b59af6253c952859",
"848ff807a83c4a79a1b3d7d80c29499c",
"a7b1f6722fcd4e90811041b24df0fe7b",
"f815d05091814c39a467cd8f528db504",
"915449ab41d848d39d801b4feb932a4f",
"2937b015455647abb7a524f858a881d2",
"c2b6cda9a8e94f7e97d7fb032b8e2bc5",
"af885a022ad743098e5037e1c8dc760a",
"088ec36aff7f415abfc4fd926fa0f902",
"b1b99d863dc64208afc11416d4936c2c",
"cb9e02be7ec44f6bb6b8771691c114e4",
"f68a247bddf9484e9f7b1666802f4612",
"d8d89ac972084304bff515a16e009452",
"3495b00846ae49acbb0cf3e15edf361e",
"60f6f23e78ce4ee2abf7389ab936c3ac",
"9d428e02c4134510baf179ce9137d90c",
"5298f4cd4e2e404ea66d70c62bcfe439",
"cd9fdc3eb94a4d00b5af6115318dcf45",
"d664c674a977456cad109347c0206d0e",
"17e5dedc0aeb4a1da32113e51158fd74",
"9b70ec9f110f4080a6a26fd12044fe94"
]
},
"id": "7fWzF9PVatgL",
"outputId": "6c37c046-a14c-4cab-e285-fa1ddfeb3241"
},
"execution_count": 17,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading (…)okenizer_config.json: 0%| | 0.00/2.54k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "18f03144f5194bd2a88064eaae1140f0"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading (…)\"spiece.model\";: 0%| | 0.00/792k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "7d6b88e56dad4dcbb0f1b1720f1ff118"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading (…)/main/tokenizer.json: 0%| | 0.00/2.42M [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "945026e5e11448b39ab37fb2a0bd963c"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading (…)cial_tokens_map.json: 0%| | 0.00/2.20k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "cb9e02be7ec44f6bb6b8771691c114e4"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.8/dist-packages/transformers/pipelines/base.py:1043: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
" warnings.warn(\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Accuracy: 0.647\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!zip -r /content/projekt.zip /content/"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "mJfe_hnJ_qVC",
"outputId": "ebdda236-1053-4b29-809d-7be9247edf19"
},
"execution_count": 18,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" adding: content/ (stored 0%)\n",
" adding: content/.config/ (stored 0%)\n",
" adding: content/.config/config_sentinel (stored 0%)\n",
" adding: content/.config/logs/ (stored 0%)\n",
" adding: content/.config/logs/2023.02.10/ (stored 0%)\n",
" adding: content/.config/logs/2023.02.10/14.32.38.026074.log (deflated 58%)\n",
" adding: content/.config/logs/2023.02.10/14.33.38.691407.log (deflated 56%)\n",
" adding: content/.config/logs/2023.02.10/14.33.11.427170.log (deflated 58%)\n",
" adding: content/.config/logs/2023.02.10/14.33.37.863925.log (deflated 57%)\n",
" adding: content/.config/logs/2023.02.10/14.32.12.281772.log (deflated 91%)\n",
" adding: content/.config/logs/2023.02.10/14.33.03.230973.log (deflated 86%)\n",
" adding: content/.config/gce (stored 0%)\n",
" adding: content/.config/.last_survey_prompt.yaml (stored 0%)\n",
" adding: content/.config/configurations/ (stored 0%)\n",
" adding: content/.config/configurations/config_default (deflated 15%)\n",
" adding: content/.config/active_config (stored 0%)\n",
" adding: content/.config/.last_update_check.json (deflated 22%)\n",
" adding: content/.config/.last_opt_in_prompt.yaml (stored 0%)\n",
" adding: content/__pycache__/ (stored 0%)\n",
" adding: content/__pycache__/roberta.cpython-38.pyc (deflated 62%)\n",
" adding: content/__pycache__/gpt2.cpython-38.pyc (deflated 53%)\n",
" adding: content/data/ (stored 0%)\n",
" adding: content/data/.ipynb_checkpoints/ (stored 0%)\n",
" adding: content/data/test.json (deflated 69%)\n",
" adding: content/data/s2s-test.json (deflated 70%)\n",
" adding: content/data/s2s-valid.json (deflated 70%)\n",
" adding: content/data/valid.json (deflated 69%)\n",
" adding: content/data/s2s-train.json (deflated 70%)\n",
" adding: content/data/train.json (deflated 69%)\n",
" adding: content/req.txt (deflated 30%)\n",
" adding: content/.cache_training_roberta/ (stored 0%)\n",
" adding: content/.cache_training_roberta/.cache_training_roberta_json_default-1808ac39383e9432_0.0.0_0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.lock (stored 0%)\n",
" adding: content/.cache_training_roberta/json/ (stored 0%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/ (stored 0%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/ (stored 0%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.incomplete_info.lock (stored 0%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/ (stored 0%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-ff4234a2fb1a9582.arrow (deflated 88%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-test.arrow (deflated 64%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-6bbf8957e5f0cf7b.arrow (deflated 88%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-train.arrow (deflated 64%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/dataset_info.json (deflated 57%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-5efe26f1bca5cac0.arrow (deflated 88%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-validation.arrow (deflated 64%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51_builder.lock (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/ (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/blobs/ (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/blobs/226b0752cac7789c48f0cb3ec53eda48b7be36cc (deflated 53%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/blobs/5606f48548d99a9829d10a96cd364b816b02cd21 (deflated 63%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/blobs/ad0bcbeb288f0d1373d88e0762e66357f55b8311 (deflated 59%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/blobs/8db5e7ac5bfc9ec8b613b776009300fe3685d957 (deflated 47%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/blobs/278b7a95739c4392fae9b818bb5343dde20be1b89318f37a6d939e1e1b9e461b (deflated 41%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/refs/ (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/refs/main (deflated 3%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/.no_exist/ (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/ (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/tokenizer_config.json (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/added_tokens.json (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/special_tokens_map.json (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/snapshots/ (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/ (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json (deflated 47%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/tokenizer.json (deflated 59%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/merges.txt (deflated 53%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/vocab.json (deflated 63%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/pytorch_model.bin (deflated 41%)\n",
" adding: content/cache_training_t5/ (stored 0%)\n",
" adding: content/cache_training_t5/cache_training_t5_json_default-25a5883a4a222bad_0.0.0_0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.lock (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/ (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/blobs/ (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/blobs/4e28ff6ebdf584f5372d9de68867399142435d9a (deflated 48%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/blobs/b114c318caf72f6e89ea92e0755c41327a453198 (deflated 82%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/blobs/07b81619b82546ab7f30e06c9615c7fca8fe3abd (deflated 44%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/blobs/881bdbffc06e471924ecea57f962bc5f8e2a9f21 (deflated 83%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/blobs/7c9a3e998a8c74b52484f3a1ccfdcc9767972ee6b34ae7a527cdf6f972a34163 (deflated 53%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/refs/ (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/refs/main (deflated 5%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/.no_exist/ (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/.no_exist/8a88af75516269158a3aa488d1abdfd3d5e4ee49/ (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/.no_exist/8a88af75516269158a3aa488d1abdfd3d5e4ee49/tokenizer.json (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/.no_exist/8a88af75516269158a3aa488d1abdfd3d5e4ee49/added_tokens.json (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/ (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/ (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json (deflated 44%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/tokenizer_config.json (deflated 82%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/spiece.model (deflated 48%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/special_tokens_map.json (deflated 83%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/pytorch_model.bin (deflated 53%)\n",
" adding: content/cache_training_t5/json/ (stored 0%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/ (stored 0%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/ (stored 0%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.incomplete_info.lock (stored 0%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/ (stored 0%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-bef49b953c77fdf0.arrow (deflated 74%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-105206b5fd478147.arrow (deflated 74%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-040b968aed3576f7.arrow (deflated 74%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-test.arrow (deflated 62%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-f37cf2f406b18541.arrow (deflated 74%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-train.arrow (deflated 62%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/dataset_info.json (deflated 58%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-b0aef076d30fe2f7.arrow (deflated 74%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-validation.arrow (deflated 62%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51_builder.lock (stored 0%)\n",
" adding: content/run_glue.py (deflated 73%)\n",
" adding: content/run_translation.py (deflated 74%)\n",
" adding: content/roberta_custom_training_cache/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/roberta_custom_training_cache_json_default-01aa9d8252a24a0d_0.0.0_0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.lock (stored 0%)\n",
" adding: content/roberta_custom_training_cache/json/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.incomplete_info.lock (stored 0%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-test.arrow (deflated 64%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-train.arrow (deflated 64%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/dataset_info.json (deflated 57%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-e62b2012f3f40cb2.arrow (deflated 88%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-cd497527f5c67ba7.arrow (deflated 88%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-9c2deb15eb4326c1.arrow (deflated 88%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-validation.arrow (deflated 64%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51_builder.lock (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/blobs/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/blobs/226b0752cac7789c48f0cb3ec53eda48b7be36cc (deflated 53%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/blobs/5606f48548d99a9829d10a96cd364b816b02cd21 (deflated 63%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/blobs/ad0bcbeb288f0d1373d88e0762e66357f55b8311 (deflated 59%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/blobs/8db5e7ac5bfc9ec8b613b776009300fe3685d957 (deflated 47%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/blobs/278b7a95739c4392fae9b818bb5343dde20be1b89318f37a6d939e1e1b9e461b (deflated 41%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/refs/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/refs/main (deflated 3%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/.no_exist/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/tokenizer_config.json (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/added_tokens.json (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/special_tokens_map.json (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json (deflated 47%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/tokenizer.json (deflated 59%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/merges.txt (deflated 53%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/vocab.json (deflated 63%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/pytorch_model.bin (deflated 41%)\n",
" adding: content/gtp_cache_training/ (stored 0%)\n",
" adding: content/gtp_cache_training/json/ (stored 0%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/ (stored 0%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/ (stored 0%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.incomplete_info.lock (stored 0%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/ (stored 0%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-test.arrow (deflated 64%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-train.arrow (deflated 64%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-7b339bb99d7c17a1.arrow (deflated 88%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/dataset_info.json (deflated 57%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-82acdaa33d6aa0eb.arrow (deflated 88%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-validation.arrow (deflated 64%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-bb8faaac56c0b87e.arrow (deflated 88%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51_builder.lock (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/ (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/blobs/ (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/blobs/226b0752cac7789c48f0cb3ec53eda48b7be36cc (deflated 53%)\n",
" adding: content/gtp_cache_training/models--gpt2/blobs/7c5d3f4b8b76583b422fcb9189ad6c89d5d97a094541ce8932dce3ecabde1421 (deflated 16%)\n",
" adding: content/gtp_cache_training/models--gpt2/blobs/1f1d9aaca301414e7f6c9396df506798ff4eb9a6 (deflated 67%)\n",
" adding: content/gtp_cache_training/models--gpt2/blobs/10c66461e4c109db5a2196bff4bb59be30396ed8 (deflated 50%)\n",
" adding: content/gtp_cache_training/models--gpt2/blobs/4b988bccc9dc5adacd403c00b4704976196548f8 (deflated 59%)\n",
" adding: content/gtp_cache_training/models--gpt2/refs/ (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/refs/main (deflated 3%)\n",
" adding: content/gtp_cache_training/models--gpt2/.no_exist/ (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/.no_exist/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/ (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/.no_exist/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer_config.json (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/.no_exist/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/added_tokens.json (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/.no_exist/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/special_tokens_map.json (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/snapshots/ (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/ (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json (deflated 50%)\n",
" adding: content/gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer.json (deflated 59%)\n",
" adding: content/gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/merges.txt (deflated 53%)\n",
" adding: content/gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/vocab.json (deflated 67%)\n",
" adding: content/gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin (deflated 16%)\n",
" adding: content/gtp_cache_training/gtp_cache_training_json_default-01aa9d8252a24a0d_0.0.0_0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.lock (stored 0%)\n",
" adding: content/t5_cache_training/ (stored 0%)\n",
" adding: content/t5_cache_training/t5_cache_training_json_default-a82ca4164dba097e_0.0.0_0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.lock (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/ (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/blobs/ (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/blobs/4e28ff6ebdf584f5372d9de68867399142435d9a (deflated 48%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/blobs/b114c318caf72f6e89ea92e0755c41327a453198 (deflated 82%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/blobs/07b81619b82546ab7f30e06c9615c7fca8fe3abd (deflated 44%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/blobs/881bdbffc06e471924ecea57f962bc5f8e2a9f21 (deflated 83%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/blobs/7c9a3e998a8c74b52484f3a1ccfdcc9767972ee6b34ae7a527cdf6f972a34163 (deflated 53%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/refs/ (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/refs/main (deflated 5%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/.no_exist/ (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/.no_exist/8a88af75516269158a3aa488d1abdfd3d5e4ee49/ (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/.no_exist/8a88af75516269158a3aa488d1abdfd3d5e4ee49/tokenizer.json (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/.no_exist/8a88af75516269158a3aa488d1abdfd3d5e4ee49/added_tokens.json (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/ (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/ (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json (deflated 44%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/tokenizer_config.json (deflated 82%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/spiece.model (deflated 48%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/special_tokens_map.json (deflated 83%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/pytorch_model.bin (deflated 53%)\n",
" adding: content/t5_cache_training/json/ (stored 0%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/ (stored 0%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/ (stored 0%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.incomplete_info.lock (stored 0%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/ (stored 0%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-test.arrow (deflated 62%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-988bff0993eee389.arrow (deflated 74%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-train.arrow (deflated 62%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/dataset_info.json (deflated 58%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-fa17416eabe18767.arrow (deflated 74%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-c6cebbf9290f7df0.arrow (deflated 74%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-validation.arrow (deflated 62%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51_builder.lock (stored 0%)\n",
" adding: content/out/ (stored 0%)\n",
" adding: content/out/emotion/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/scheduler.pt (deflated 49%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/trainer_state.json (deflated 79%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/optimizer.pt (deflated 30%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2_custom/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2_custom/all_results.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2_custom/predict_results_None.txt (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2_custom/trainer_state.json (deflated 80%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/scheduler.pt (deflated 49%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/trainer_state.json (deflated 77%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/optimizer.pt (deflated 30%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2_custom/train_results.json (deflated 40%)\n",
" adding: content/out/emotion/gpt2_custom/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2_custom/eval_results.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/scheduler.pt (deflated 50%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/trainer_state.json (deflated 80%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/optimizer.pt (deflated 30%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2_custom/runs/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-11-35_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-11-35_fc0011e45a00/1676409101.551365/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-11-35_fc0011e45a00/1676409101.551365/events.out.tfevents.1676409101.fc0011e45a00.60473.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-11-35_fc0011e45a00/events.out.tfevents.1676409101.fc0011e45a00.60473.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-53_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-53_fc0011e45a00/events.out.tfevents.1676407620.fc0011e45a00.53924.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-53_fc0011e45a00/1676407620.269752/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-53_fc0011e45a00/1676407620.269752/events.out.tfevents.1676407620.fc0011e45a00.53924.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00/events.out.tfevents.1676411802.fc0011e45a00.72811.0 (deflated 63%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00/events.out.tfevents.1676412248.fc0011e45a00.72811.2 (deflated 28%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00/1676411802.9557116/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00/1676411802.9557116/events.out.tfevents.1676411802.fc0011e45a00.72811.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-13-12_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-13-12_fc0011e45a00/events.out.tfevents.1676409199.fc0011e45a00.60936.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-13-12_fc0011e45a00/1676409199.1303008/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-13-12_fc0011e45a00/1676409199.1303008/events.out.tfevents.1676409199.fc0011e45a00.60936.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-59-18_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-59-18_fc0011e45a00/1676408364.7675455/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-59-18_fc0011e45a00/1676408364.7675455/events.out.tfevents.1676408364.fc0011e45a00.57251.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-59-18_fc0011e45a00/events.out.tfevents.1676408364.fc0011e45a00.57251.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-14-48_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-14-48_fc0011e45a00/events.out.tfevents.1676409294.fc0011e45a00.61381.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-14-48_fc0011e45a00/1676409294.483754/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-14-48_fc0011e45a00/1676409294.483754/events.out.tfevents.1676409294.fc0011e45a00.61381.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-07_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-07_fc0011e45a00/events.out.tfevents.1676407574.fc0011e45a00.53675.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-07_fc0011e45a00/1676407574.5370467/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-07_fc0011e45a00/1676407574.5370467/events.out.tfevents.1676407574.fc0011e45a00.53675.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-15-57_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-15-57_fc0011e45a00/1676409363.3658211/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-15-57_fc0011e45a00/1676409363.3658211/events.out.tfevents.1676409363.fc0011e45a00.61724.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-15-57_fc0011e45a00/events.out.tfevents.1676409363.fc0011e45a00.61724.0 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-44-02_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-44-02_fc0011e45a00/events.out.tfevents.1676407449.fc0011e45a00.53094.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-44-02_fc0011e45a00/1676407449.3215246/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-44-02_fc0011e45a00/1676407449.3215246/events.out.tfevents.1676407449.fc0011e45a00.53094.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-09-03_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-09-03_fc0011e45a00/events.out.tfevents.1676408949.fc0011e45a00.59782.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-09-03_fc0011e45a00/1676408949.6798263/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-09-03_fc0011e45a00/1676408949.6798263/events.out.tfevents.1676408949.fc0011e45a00.59782.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-41-48_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-41-48_fc0011e45a00/events.out.tfevents.1676410915.fc0011e45a00.68705.0 (deflated 57%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-41-48_fc0011e45a00/1676410915.0364006/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-41-48_fc0011e45a00/1676410915.0364006/events.out.tfevents.1676410915.fc0011e45a00.68705.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-48-55_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-48-55_fc0011e45a00/events.out.tfevents.1676407741.fc0011e45a00.54546.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-48-55_fc0011e45a00/1676407741.3566854/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-48-55_fc0011e45a00/1676407741.3566854/events.out.tfevents.1676407741.fc0011e45a00.54546.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-47-46_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-47-46_fc0011e45a00/events.out.tfevents.1676407672.fc0011e45a00.54203.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-47-46_fc0011e45a00/1676407672.9366086/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-47-46_fc0011e45a00/1676407672.9366086/events.out.tfevents.1676407672.fc0011e45a00.54203.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-56-39_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-56-39_fc0011e45a00/events.out.tfevents.1676408205.fc0011e45a00.56536.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-56-39_fc0011e45a00/1676408205.8404686/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-56-39_fc0011e45a00/1676408205.8404686/events.out.tfevents.1676408205.fc0011e45a00.56536.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-55-46_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-55-46_fc0011e45a00/1676408153.0722597/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-55-46_fc0011e45a00/1676408153.0722597/events.out.tfevents.1676408153.fc0011e45a00.56263.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-55-46_fc0011e45a00/events.out.tfevents.1676408153.fc0011e45a00.56263.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/scheduler.pt (deflated 49%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/trainer_state.json (deflated 75%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/optimizer.pt (deflated 30%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2_custom/README.md (deflated 54%)\n",
" adding: content/out/emotion/gpt2_custom/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2_custom/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2_custom/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/scheduler.pt (deflated 49%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/trainer_state.json (deflated 67%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/optimizer.pt (deflated 31%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2_custom/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/scheduler.pt (deflated 49%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/trainer_state.json (deflated 80%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/optimizer.pt (deflated 29%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2/all_results.json (deflated 55%)\n",
" adding: content/out/emotion/gpt2/predict_results_None.txt (deflated 62%)\n",
" adding: content/out/emotion/gpt2/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2/trainer_state.json (deflated 81%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/scheduler.pt (deflated 49%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/trainer_state.json (deflated 78%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/optimizer.pt (deflated 29%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2/train_results.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2/eval_results.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/scheduler.pt (deflated 50%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/trainer_state.json (deflated 81%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/optimizer.pt (deflated 29%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2/runs/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00/events.out.tfevents.1676411778.fc0011e45a00.70872.2 (deflated 28%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00/1676411348.7268953/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00/1676411348.7268953/events.out.tfevents.1676411348.fc0011e45a00.70872.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00/events.out.tfevents.1676411348.fc0011e45a00.70872.0 (deflated 63%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_20-34-05_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_20-34-05_fc0011e45a00/events.out.tfevents.1676407272.fc0011e45a00.50524.2 (deflated 28%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_20-34-05_fc0011e45a00/events.out.tfevents.1676406850.fc0011e45a00.50524.0 (deflated 63%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_20-34-05_fc0011e45a00/1676406850.2390406/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_20-34-05_fc0011e45a00/1676406850.2390406/events.out.tfevents.1676406850.fc0011e45a00.50524.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_19-44-33_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_19-44-33_fc0011e45a00/events.out.tfevents.1676403875.fc0011e45a00.37469.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_19-44-33_fc0011e45a00/1676403875.9091897/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_19-44-33_fc0011e45a00/1676403875.9091897/events.out.tfevents.1676403875.fc0011e45a00.37469.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/scheduler.pt (deflated 49%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/trainer_state.json (deflated 75%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/optimizer.pt (deflated 29%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2/README.md (deflated 54%)\n",
" adding: content/out/emotion/gpt2/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/scheduler.pt (deflated 49%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/trainer_state.json (deflated 67%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/optimizer.pt (deflated 30%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2/pytorch_model.bin\n",
"\n",
"\n",
"zip error: Interrupted (aborting)\n"
]
}
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
}
},
"colab": {
"provenance": []
},
"accelerator": "GPU",
"gpuClass": "premium",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"18f03144f5194bd2a88064eaae1140f0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_36b8333766d44ee2aaa8da8ee75975d2",
"IPY_MODEL_f7a9b125cf1346468e428abd689ff800",
"IPY_MODEL_9b9e6161874f41c98d5c5e55d8d4fc86"
],
"layout": "IPY_MODEL_9925a6f17ba14eee96332f0ea1dc88e5"
}
},
"36b8333766d44ee2aaa8da8ee75975d2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_dce64adfb8334591a8ce182918ecb4e3",
"placeholder": "",
"style": "IPY_MODEL_9efd8cd2208245aca3f369f0735e2ee1",
"value": "Downloading (…)okenizer_config.json: 100%"
}
},
"f7a9b125cf1346468e428abd689ff800": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_3d05704ffb0040c8b5bfb5c068c3329b",
"max": 2539,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_9564dcdd10c64072bb09e70def311ff3",
"value": 2539
}
},
"9b9e6161874f41c98d5c5e55d8d4fc86": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_f406c9b52a274068bd636554558497b2",
"placeholder": "",
"style": "IPY_MODEL_d97be50f8cc64f8680a6cce112863255",
"value": " 2.54k/2.54k [00:00&lt;00:00, 125kB/s]"
}
},
"9925a6f17ba14eee96332f0ea1dc88e5": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"dce64adfb8334591a8ce182918ecb4e3": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9efd8cd2208245aca3f369f0735e2ee1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"3d05704ffb0040c8b5bfb5c068c3329b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9564dcdd10c64072bb09e70def311ff3": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"f406c9b52a274068bd636554558497b2": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d97be50f8cc64f8680a6cce112863255": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"7d6b88e56dad4dcbb0f1b1720f1ff118": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_eabc78cbdeef40feb36cf90fdbcdfbc7",
"IPY_MODEL_6477d99dffbc4cf39e2c6998f71e37f7",
"IPY_MODEL_d63511a8852942309cabe53720939fcc"
],
"layout": "IPY_MODEL_3096b59f64eb48659a8eedea5a171be4"
}
},
"eabc78cbdeef40feb36cf90fdbcdfbc7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_acc58b06f3b54801b10ee872fab39e6e",
"placeholder": "",
"style": "IPY_MODEL_c2bd9c9ddab848529e52adfdc7634044",
"value": "Downloading (…)&quot;spiece.model&quot;;: 100%"
}
},
"6477d99dffbc4cf39e2c6998f71e37f7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_8d7e8c29d7e247f1b55d329d40508526",
"max": 791656,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_457b70adcab0464c9f990b13f433c635",
"value": 791656
}
},
"d63511a8852942309cabe53720939fcc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_0858fe327ec549b488f6169de1d84654",
"placeholder": "",
"style": "IPY_MODEL_e18a505153c7491f8900142fb1189cd7",
"value": " 792k/792k [00:00&lt;00:00, 8.08MB/s]"
}
},
"3096b59f64eb48659a8eedea5a171be4": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"acc58b06f3b54801b10ee872fab39e6e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"c2bd9c9ddab848529e52adfdc7634044": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"8d7e8c29d7e247f1b55d329d40508526": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"457b70adcab0464c9f990b13f433c635": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"0858fe327ec549b488f6169de1d84654": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e18a505153c7491f8900142fb1189cd7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"945026e5e11448b39ab37fb2a0bd963c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_8c3aa97d58cb4f21b59af6253c952859",
"IPY_MODEL_848ff807a83c4a79a1b3d7d80c29499c",
"IPY_MODEL_a7b1f6722fcd4e90811041b24df0fe7b"
],
"layout": "IPY_MODEL_f815d05091814c39a467cd8f528db504"
}
},
"8c3aa97d58cb4f21b59af6253c952859": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_915449ab41d848d39d801b4feb932a4f",
"placeholder": "",
"style": "IPY_MODEL_2937b015455647abb7a524f858a881d2",
"value": "Downloading (…)/main/tokenizer.json: 100%"
}
},
"848ff807a83c4a79a1b3d7d80c29499c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_c2b6cda9a8e94f7e97d7fb032b8e2bc5",
"max": 2424064,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_af885a022ad743098e5037e1c8dc760a",
"value": 2424064
}
},
"a7b1f6722fcd4e90811041b24df0fe7b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_088ec36aff7f415abfc4fd926fa0f902",
"placeholder": "",
"style": "IPY_MODEL_b1b99d863dc64208afc11416d4936c2c",
"value": " 2.42M/2.42M [00:00&lt;00:00, 18.4MB/s]"
}
},
"f815d05091814c39a467cd8f528db504": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"915449ab41d848d39d801b4feb932a4f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"2937b015455647abb7a524f858a881d2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"c2b6cda9a8e94f7e97d7fb032b8e2bc5": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"af885a022ad743098e5037e1c8dc760a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"088ec36aff7f415abfc4fd926fa0f902": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b1b99d863dc64208afc11416d4936c2c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"cb9e02be7ec44f6bb6b8771691c114e4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_f68a247bddf9484e9f7b1666802f4612",
"IPY_MODEL_d8d89ac972084304bff515a16e009452",
"IPY_MODEL_3495b00846ae49acbb0cf3e15edf361e"
],
"layout": "IPY_MODEL_60f6f23e78ce4ee2abf7389ab936c3ac"
}
},
"f68a247bddf9484e9f7b1666802f4612": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_9d428e02c4134510baf179ce9137d90c",
"placeholder": "",
"style": "IPY_MODEL_5298f4cd4e2e404ea66d70c62bcfe439",
"value": "Downloading (…)cial_tokens_map.json: 100%"
}
},
"d8d89ac972084304bff515a16e009452": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_cd9fdc3eb94a4d00b5af6115318dcf45",
"max": 2201,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_d664c674a977456cad109347c0206d0e",
"value": 2201
}
},
"3495b00846ae49acbb0cf3e15edf361e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_17e5dedc0aeb4a1da32113e51158fd74",
"placeholder": "",
"style": "IPY_MODEL_9b70ec9f110f4080a6a26fd12044fe94",
"value": " 2.20k/2.20k [00:00&lt;00:00, 160kB/s]"
}
},
"60f6f23e78ce4ee2abf7389ab936c3ac": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9d428e02c4134510baf179ce9137d90c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"5298f4cd4e2e404ea66d70c62bcfe439": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"cd9fdc3eb94a4d00b5af6115318dcf45": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d664c674a977456cad109347c0206d0e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"17e5dedc0aeb4a1da32113e51158fd74": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9b70ec9f110f4080a6a26fd12044fe94": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}