UG-final/all_models.ipynb

6530 lines
431 KiB
Plaintext
Raw Normal View History

2023-02-14 23:44:20 +01:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "svk2qSrl7ICc"
},
"source": [
"# **Uczenie Głębokie - projekt**\n",
"W projekcie wykorzystano dataset [emotion](https://huggingface.co/datasets/emotion), zawierający wpisy nacechowane określonymi emocjami.\n",
"\n",
"<br>\n",
"\n",
"Labels:\n",
"- 0 - sadness\n",
"- 1 - joy\n",
"- 2 - love\n",
"- 3 - anger\n",
"- 4 - fear\n",
"- 5 - surprise"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "wJ30OIAM7ICf"
},
"source": [
"### **REQUIREMENTS**"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "XkE5ENXV7ICf",
"outputId": "68ec24ee-8dcd-48b7-c0ce-3d18c1b9bcd6"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: transformers in /usr/local/lib/python3.8/dist-packages (4.23.1)\n",
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.8/dist-packages (1.2.1)\n",
"Requirement already satisfied: accelerate in /usr/local/lib/python3.8/dist-packages (0.16.0)\n",
"Requirement already satisfied: evaluate in /usr/local/lib/python3.8/dist-packages (0.4.0)\n",
"Requirement already satisfied: datasets in /usr/local/lib/python3.8/dist-packages (2.9.0)\n",
"Requirement already satisfied: torch in /usr/local/lib/python3.8/dist-packages (1.13.1)\n",
"Requirement already satisfied: sentencepiece in /usr/local/lib/python3.8/dist-packages (0.1.97)\n",
"Requirement already satisfied: torchvision in /usr/local/lib/python3.8/dist-packages (0.14.1+cu116)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from transformers) (3.9.0)\n",
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.8/dist-packages (from transformers) (4.64.1)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from transformers) (1.21.6)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.10.0 in /usr/local/lib/python3.8/dist-packages (from transformers) (0.12.0)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.8/dist-packages (from transformers) (2022.6.2)\n",
"Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.8/dist-packages (from transformers) (0.13.2)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.8/dist-packages (from transformers) (2.25.1)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.8/dist-packages (from transformers) (6.0)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from transformers) (23.0)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (3.1.0)\n",
"Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.2.0)\n",
"Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.7.3)\n",
"Requirement already satisfied: psutil in /usr/local/lib/python3.8/dist-packages (from accelerate) (5.4.8)\n",
"Requirement already satisfied: dill in /usr/local/lib/python3.8/dist-packages (from evaluate) (0.3.6)\n",
"Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.8/dist-packages (from evaluate) (0.18.0)\n",
"Requirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.8/dist-packages (from evaluate) (2023.1.0)\n",
"Requirement already satisfied: xxhash in /usr/local/lib/python3.8/dist-packages (from evaluate) (3.2.0)\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.8/dist-packages (from evaluate) (1.3.5)\n",
"Requirement already satisfied: multiprocess in /usr/local/lib/python3.8/dist-packages (from evaluate) (0.70.14)\n",
"Requirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.8/dist-packages (from datasets) (9.0.0)\n",
"Requirement already satisfied: aiohttp in /usr/local/lib/python3.8/dist-packages (from datasets) (3.8.3)\n",
"Requirement already satisfied: nvidia-cublas-cu11==11.10.3.66 in /usr/local/lib/python3.8/dist-packages (from torch) (11.10.3.66)\n",
"Requirement already satisfied: nvidia-cuda-runtime-cu11==11.7.99 in /usr/local/lib/python3.8/dist-packages (from torch) (11.7.99)\n",
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.8/dist-packages (from torch) (4.4.0)\n",
"Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.7.99 in /usr/local/lib/python3.8/dist-packages (from torch) (11.7.99)\n",
"Requirement already satisfied: nvidia-cudnn-cu11==8.5.0.96 in /usr/local/lib/python3.8/dist-packages (from torch) (8.5.0.96)\n",
"Requirement already satisfied: wheel in /usr/local/lib/python3.8/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch) (0.38.4)\n",
"Requirement already satisfied: setuptools in /usr/local/lib/python3.8/dist-packages (from nvidia-cublas-cu11==11.10.3.66->torch) (57.4.0)\n",
"Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.8/dist-packages (from torchvision) (7.1.2)\n",
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (4.0.2)\n",
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.8.2)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (6.0.4)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.3)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.1)\n",
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (22.2.0)\n",
"Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (2.1.1)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (2022.12.7)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (1.26.14)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (2.10)\n",
"Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (4.0.0)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas->evaluate) (2.8.2)\n",
"Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas->evaluate) (2022.7.1)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.8/dist-packages (from python-dateutil>=2.7.3->pandas->evaluate) (1.15.0)\n"
]
}
],
"source": [
"!pip3 install transformers scikit-learn accelerate evaluate datasets torch sentencepiece torchvision"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "MrV5G1gW7ICg"
},
"outputs": [],
"source": [
"import os\n",
"import json\n",
"from pathlib import Path\n",
"from typing import Dict, List\n",
"from datasets import load_dataset\n",
"import torch\n",
"import pandas as pd\n",
"\n",
"os.environ['TOKENIZERS_PARALLELISM'] = 'true'"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Y107u4JG7ICh"
},
"source": [
"### **DATA PREP**"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "PmgAAQFV7ICh",
"outputId": "e6f4f065-4d0d-4102-d96a-c5ca791dd113"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"No config specified, defaulting to: emotion/split\n",
"Found cached dataset emotion (/root/.cache/huggingface/datasets/emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd)\n",
"\r 0% 0/3 [00:00<?, ?it/s]\r100% 3/3 [00:00<00:00, 182.77it/s]\n",
"Saving into: data/train.json\n",
"Saving into: data/s2s-train.json\n",
"Saving into: data/valid.json\n",
"Saving into: data/s2s-valid.json\n",
"Saving into: data/test.json\n",
"Saving into: data/s2s-test.json\n"
]
}
],
"source": [
"!mkdir -p data\n",
"!python data_prep.py"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Fv0h2-MW7ICh",
"outputId": "ab7744f0-38e1-4415-f9e0-dbb182583e83"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\"label\": 0, \"text\": \"i didnt feel humiliated\"}\n",
"{\"label\": 0, \"text\": \"i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake\"}\n",
"{\"label\": 3, \"text\": \"im grabbing a minute to post i feel greedy wrong\"}\n",
"{\"label\": 2, \"text\": \"i am ever feeling nostalgic about the fireplace i will know that it is still on the property\"}\n",
"{\"label\": 3, \"text\": \"i am feeling grouchy\"}\n",
"{\"label\": 0, \"text\": \"ive been feeling a little burdened lately wasnt sure why that was\"}\n",
"{\"label\": 5, \"text\": \"ive been taking or milligrams or times recommended amount and ive fallen asleep a lot faster but i also feel like so funny\"}\n",
"{\"label\": 4, \"text\": \"i feel as confused about life as a teenager or as jaded as a year old man\"}\n",
"{\"label\": 1, \"text\": \"i have been with petronas for years i feel that petronas has performed well and made a huge profit\"}\n",
"{\"label\": 2, \"text\": \"i feel romantic too\"}\n"
]
}
],
"source": [
"!head data/train.json"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "6XHKYEod7ICi",
"outputId": "75b8480e-159a-4968-b0cc-3605680f7410"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\"label\": \"sadness\", \"text\": \"i didnt feel humiliated\"}\n",
"{\"label\": \"sadness\", \"text\": \"i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake\"}\n",
"{\"label\": \"anger\", \"text\": \"im grabbing a minute to post i feel greedy wrong\"}\n",
"{\"label\": \"love\", \"text\": \"i am ever feeling nostalgic about the fireplace i will know that it is still on the property\"}\n",
"{\"label\": \"anger\", \"text\": \"i am feeling grouchy\"}\n",
"{\"label\": \"sadness\", \"text\": \"ive been feeling a little burdened lately wasnt sure why that was\"}\n",
"{\"label\": \"surprise\", \"text\": \"ive been taking or milligrams or times recommended amount and ive fallen asleep a lot faster but i also feel like so funny\"}\n",
"{\"label\": \"fear\", \"text\": \"i feel as confused about life as a teenager or as jaded as a year old man\"}\n",
"{\"label\": \"joy\", \"text\": \"i have been with petronas for years i feel that petronas has performed well and made a huge profit\"}\n",
"{\"label\": \"love\", \"text\": \"i feel romantic too\"}\n"
]
}
],
"source": [
"!head data/s2s-train.json"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "BtTwN0yz7ICj",
"outputId": "5818cd0b-56eb-4a0f-cada-cf89d03f8d9c"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" 2000 data/s2s-test.json\n",
" 16000 data/s2s-train.json\n",
" 2000 data/s2s-valid.json\n",
" 2000 data/test.json\n",
" 16000 data/train.json\n",
" 2000 data/valid.json\n",
" 40000 total\n"
]
}
],
"source": [
"!wc -l data/*"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "jaX7Iyck7ICk"
},
"source": [
"## **ROBERTA**"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "bPBy_20B7ICk"
},
"source": [
"- full data\n",
"- model `roberta-base`\n",
"- sequnece length: 128\n",
"- training epoch: 1"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"id": "C5TetFI_7ICk",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "b83e8452-3eb2-4230-f19e-55fe8a830f4e"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"2023-02-14 21:44:57.299984: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2023-02-14 21:44:57.452345: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
"2023-02-14 21:44:58.236913: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 21:44:58.237017: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 21:44:58.237058: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
"WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
"INFO:__main__:Training/evaluation parameters TrainingArguments(\n",
"_n_gpu=1,\n",
"adafactor=False,\n",
"adam_beta1=0.9,\n",
"adam_beta2=0.999,\n",
"adam_epsilon=1e-08,\n",
"auto_find_batch_size=False,\n",
"bf16=False,\n",
"bf16_full_eval=False,\n",
"data_seed=None,\n",
"dataloader_drop_last=False,\n",
"dataloader_num_workers=0,\n",
"dataloader_pin_memory=True,\n",
"ddp_bucket_cap_mb=None,\n",
"ddp_find_unused_parameters=None,\n",
"ddp_timeout=1800,\n",
"debug=[],\n",
"deepspeed=None,\n",
"disable_tqdm=False,\n",
"do_eval=True,\n",
"do_predict=True,\n",
"do_train=True,\n",
"eval_accumulation_steps=None,\n",
"eval_delay=0,\n",
"eval_steps=None,\n",
"evaluation_strategy=no,\n",
"fp16=False,\n",
"fp16_backend=auto,\n",
"fp16_full_eval=False,\n",
"fp16_opt_level=O1,\n",
"fsdp=[],\n",
"fsdp_min_num_params=0,\n",
"fsdp_transformer_layer_cls_to_wrap=None,\n",
"full_determinism=False,\n",
"gradient_accumulation_steps=1,\n",
"gradient_checkpointing=False,\n",
"greater_is_better=None,\n",
"group_by_length=False,\n",
"half_precision_backend=auto,\n",
"hub_model_id=None,\n",
"hub_private_repo=False,\n",
"hub_strategy=every_save,\n",
"hub_token=<HUB_TOKEN>,\n",
"ignore_data_skip=False,\n",
"include_inputs_for_metrics=False,\n",
"jit_mode_eval=False,\n",
"label_names=None,\n",
"label_smoothing_factor=0.0,\n",
"learning_rate=2e-05,\n",
"length_column_name=length,\n",
"load_best_model_at_end=False,\n",
"local_rank=-1,\n",
"log_level=passive,\n",
"log_level_replica=passive,\n",
"log_on_each_node=True,\n",
"logging_dir=out/emotion/roberta/runs/Feb14_21-45-00_fc0011e45a00,\n",
"logging_first_step=False,\n",
"logging_nan_inf_filter=True,\n",
"logging_steps=500,\n",
"logging_strategy=steps,\n",
"lr_scheduler_type=linear,\n",
"max_grad_norm=1.0,\n",
"max_steps=-1,\n",
"metric_for_best_model=None,\n",
"mp_parameters=,\n",
"no_cuda=False,\n",
"num_train_epochs=1.0,\n",
"optim=adamw_hf,\n",
"output_dir=out/emotion/roberta,\n",
"overwrite_output_dir=True,\n",
"past_index=-1,\n",
"per_device_eval_batch_size=24,\n",
"per_device_train_batch_size=24,\n",
"prediction_loss_only=False,\n",
"push_to_hub=False,\n",
"push_to_hub_model_id=None,\n",
"push_to_hub_organization=None,\n",
"push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
"ray_scope=last,\n",
"remove_unused_columns=True,\n",
"report_to=['tensorboard'],\n",
"resume_from_checkpoint=None,\n",
"run_name=out/emotion/roberta,\n",
"save_on_each_node=False,\n",
"save_steps=500,\n",
"save_strategy=steps,\n",
"save_total_limit=None,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torchdynamo=None,\n",
"tpu_metrics_debug=False,\n",
"tpu_num_cores=None,\n",
"use_ipex=False,\n",
"use_legacy_prediction_loop=False,\n",
"use_mps_device=False,\n",
"warmup_ratio=0.0,\n",
"warmup_steps=0,\n",
"weight_decay=0.0,\n",
"xpu_backend=None,\n",
")\n",
"INFO:__main__:load a local file for train: data/train.json\n",
"INFO:__main__:load a local file for validation: data/valid.json\n",
"INFO:__main__:load a local file for test: data/test.json\n",
"WARNING:datasets.builder:Using custom data configuration default-01aa9d8252a24a0d\n",
"INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n",
"INFO:datasets.builder:Generating dataset json (/content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
"Downloading and preparing dataset json/default to /content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n",
"Downloading data files: 100% 3/3 [00:00<00:00, 11491.24it/s]\n",
"INFO:datasets.download.download_manager:Downloading took 0.0 min\n",
"INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n",
"Extracting data files: 100% 3/3 [00:00<00:00, 1882.54it/s]\n",
"INFO:datasets.utils.info_utils:Unable to verify checksums.\n",
"INFO:datasets.builder:Generating train split\n",
"INFO:datasets.builder:Generating validation split\n",
"INFO:datasets.builder:Generating test split\n",
"INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n",
"Dataset json downloaded and prepared to /content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n",
"100% 3/3 [00:00<00:00, 573.49it/s]\n",
"Downloading (…)lve/main/config.json: 100% 481/481 [00:00<00:00, 83.8kB/s]\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:45:01,575 >> loading configuration file config.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:45:01,576 >> Model config RobertaConfig {\n",
" \"_name_or_path\": \"roberta-base\",\n",
" \"architectures\": [\n",
" \"RobertaForMaskedLM\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"bos_token_id\": 0,\n",
" \"classifier_dropout\": null,\n",
" \"eos_token_id\": 2,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"id2label\": {\n",
" \"0\": \"LABEL_0\",\n",
" \"1\": \"LABEL_1\",\n",
" \"2\": \"LABEL_2\",\n",
" \"3\": \"LABEL_3\",\n",
" \"4\": \"LABEL_4\",\n",
" \"5\": \"LABEL_5\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"label2id\": {\n",
" \"LABEL_0\": 0,\n",
" \"LABEL_1\": 1,\n",
" \"LABEL_2\": 2,\n",
" \"LABEL_3\": 3,\n",
" \"LABEL_4\": 4,\n",
" \"LABEL_5\": 5\n",
" },\n",
" \"layer_norm_eps\": 1e-05,\n",
" \"max_position_embeddings\": 514,\n",
" \"model_type\": \"roberta\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"pad_token_id\": 1,\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 1,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50265\n",
"}\n",
"\n",
"[INFO|tokenization_auto.py:418] 2023-02-14 21:45:01,670 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:45:01,762 >> loading configuration file config.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:45:01,763 >> Model config RobertaConfig {\n",
" \"_name_or_path\": \"roberta-base\",\n",
" \"architectures\": [\n",
" \"RobertaForMaskedLM\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"bos_token_id\": 0,\n",
" \"classifier_dropout\": null,\n",
" \"eos_token_id\": 2,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"layer_norm_eps\": 1e-05,\n",
" \"max_position_embeddings\": 514,\n",
" \"model_type\": \"roberta\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"pad_token_id\": 1,\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 1,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50265\n",
"}\n",
"\n",
"Downloading (…)olve/main/vocab.json: 100% 899k/899k [00:00<00:00, 9.36MB/s]\n",
"Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 4.95MB/s]\n",
"Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 11.7MB/s]\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,975 >> loading file vocab.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/vocab.json\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file merges.txt from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/merges.txt\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file tokenizer.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/tokenizer.json\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file added_tokens.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file special_tokens_map.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:45:02,976 >> loading file tokenizer_config.json from cache at None\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:45:02,976 >> loading configuration file config.json from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:45:02,977 >> Model config RobertaConfig {\n",
" \"_name_or_path\": \"roberta-base\",\n",
" \"architectures\": [\n",
" \"RobertaForMaskedLM\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"bos_token_id\": 0,\n",
" \"classifier_dropout\": null,\n",
" \"eos_token_id\": 2,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"layer_norm_eps\": 1e-05,\n",
" \"max_position_embeddings\": 514,\n",
" \"model_type\": \"roberta\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"pad_token_id\": 1,\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 1,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50265\n",
"}\n",
"\n",
"INFO:__main__:Using implementation from class: AutoModelForSequenceClassification\n",
"Downloading (…)\"pytorch_model.bin\";: 100% 501M/501M [00:04<00:00, 105MB/s]\n",
"[INFO|modeling_utils.py:2156] 2023-02-14 21:45:08,072 >> loading weights file pytorch_model.bin from cache at roberta_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/pytorch_model.bin\n",
"[WARNING|modeling_utils.py:2596] 2023-02-14 21:45:09,415 >> Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.bias', 'roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight']\n",
"- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"[WARNING|modeling_utils.py:2608] 2023-02-14 21:45:09,415 >> Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.dense.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
"\n",
"\n",
"Frozen layers:\n",
"[('roberta.encoder.layer.0.attention.self.query.weight', False), ('roberta.encoder.layer.0.attention.self.query.bias', False), ('roberta.encoder.layer.0.attention.self.key.weight', False), ('roberta.encoder.layer.0.attention.self.key.bias', False), ('roberta.encoder.layer.0.attention.self.value.weight', False), ('roberta.encoder.layer.0.attention.self.value.bias', False), ('roberta.encoder.layer.0.attention.output.dense.weight', False), ('roberta.encoder.layer.0.attention.output.dense.bias', False), ('roberta.encoder.layer.0.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.0.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.0.intermediate.dense.weight', False), ('roberta.encoder.layer.0.intermediate.dense.bias', False), ('roberta.encoder.layer.0.output.dense.weight', False), ('roberta.encoder.layer.0.output.dense.bias', False), ('roberta.encoder.layer.0.output.LayerNorm.weight', False), ('roberta.encoder.layer.0.output.LayerNorm.bias', False), ('roberta.encoder.layer.2.attention.self.query.weight', False), ('roberta.encoder.layer.2.attention.self.query.bias', False), ('roberta.encoder.layer.2.attention.self.key.weight', False), ('roberta.encoder.layer.2.attention.self.key.bias', False), ('roberta.encoder.layer.2.attention.self.value.weight', False), ('roberta.encoder.layer.2.attention.self.value.bias', False), ('roberta.encoder.layer.2.attention.output.dense.weight', False), ('roberta.encoder.layer.2.attention.output.dense.bias', False), ('roberta.encoder.layer.2.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.2.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.2.intermediate.dense.weight', False), ('roberta.encoder.layer.2.intermediate.dense.bias', False), ('roberta.encoder.layer.2.output.dense.weight', False), ('roberta.encoder.layer.2.output.dense.bias', False), ('roberta.encoder.layer.2.output.LayerNorm.weight', False), ('roberta.encoder.layer.2.output.LayerNorm.bias', False), ('roberta.encoder.layer.4.attention.self.query.weight', False), ('roberta.encoder.layer.4.attention.self.query.bias', False), ('roberta.encoder.layer.4.attention.self.key.weight', False), ('roberta.encoder.layer.4.attention.self.key.bias', False), ('roberta.encoder.layer.4.attention.self.value.weight', False), ('roberta.encoder.layer.4.attention.self.value.bias', False), ('roberta.encoder.layer.4.attention.output.dense.weight', False), ('roberta.encoder.layer.4.attention.output.dense.bias', False), ('roberta.encoder.layer.4.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.4.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.4.intermediate.dense.weight', False), ('roberta.encoder.layer.4.intermediate.dense.bias', False), ('roberta.encoder.layer.4.output.dense.weight', False), ('roberta.encoder.layer.4.output.dense.bias', False), ('roberta.encoder.layer.4.output.LayerNorm.weight', False), ('roberta.encoder.layer.4.output.LayerNorm.bias', False), ('roberta.encoder.layer.6.attention.self.query.weight', False), ('roberta.encoder.layer.6.attention.self.query.bias', False), ('roberta.encoder.layer.6.attention.self.key.weight', False), ('roberta.encoder.layer.6.attention.self.key.bias', False), ('roberta.encoder.layer.6.attention.self.value.weight', False), ('roberta.encoder.layer.6.attention.self.value.bias', False), ('roberta.encoder.layer.6.attention.output.dense.weight', False), ('roberta.encoder.layer.6.attention.output.dense.bias', False), ('roberta.encoder.layer.6.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.6.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.6.intermediate.dense.weight', False), ('roberta.encoder.layer.6.intermediate.dense.bias', False), ('roberta.encoder.layer.6.output.dense.weight', False), ('roberta.encoder.layer.6.output.dense.bias', False), ('roberta.encoder.layer.6.output.LayerNorm.weight', False), ('roberta.encoder.layer.6.output.LayerNorm.bias', False), ('roberta.encoder.layer.8.attention.self.query.weight', False), ('roberta.encoder.layer.8.attention.self.query.bi
"\n",
"\n",
"Running tokenizer on dataset: 0% 0/16 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-e62b2012f3f40cb2.arrow\n",
"Running tokenizer on dataset: 100% 16/16 [00:00<00:00, 20.66ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-cd497527f5c67ba7.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 7.58ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/roberta_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-9c2deb15eb4326c1.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 20.81ba/s]\n",
"INFO:__main__:Sample 10476 of the training set: {'label': 0, 'text': 'i do find new friends i m going to try extra hard to make them stay and if i decide that i don t want to feel hurt again and just ride out the last year of school on my own i m going to have to try extra hard not to care what people think of me being a loner', 'input_ids': [0, 118, 109, 465, 92, 964, 939, 475, 164, 7, 860, 1823, 543, 7, 146, 106, 1095, 8, 114, 939, 2845, 14, 939, 218, 326, 236, 7, 619, 2581, 456, 8, 95, 3068, 66, 5, 94, 76, 9, 334, 15, 127, 308, 939, 475, 164, 7, 33, 7, 860, 1823, 543, 45, 7, 575, 99, 82, 206, 9, 162, 145, 10, 784, 9604, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 1824 of the training set: {'label': 1, 'text': 'i asked them to join me in creating a world where all year old girls could grow up feeling hopeful and powerful', 'input_ids': [0, 118, 553, 106, 7, 1962, 162, 11, 2351, 10, 232, 147, 70, 76, 793, 1972, 115, 1733, 62, 2157, 7917, 8, 2247, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 409 of the training set: {'label': 2, 'text': 'i feel when you are a caring person you attract other caring people into your life', 'input_ids': [0, 118, 619, 77, 47, 32, 10, 10837, 621, 47, 5696, 97, 10837, 82, 88, 110, 301, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"[INFO|trainer.py:725] 2023-02-14 21:45:13,102 >> The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`, you can safely ignore this message.\n",
"/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" warnings.warn(\n",
"[INFO|trainer.py:1607] 2023-02-14 21:45:13,109 >> ***** Running training *****\n",
"[INFO|trainer.py:1608] 2023-02-14 21:45:13,109 >> Num examples = 16000\n",
"[INFO|trainer.py:1609] 2023-02-14 21:45:13,109 >> Num Epochs = 1\n",
"[INFO|trainer.py:1610] 2023-02-14 21:45:13,109 >> Instantaneous batch size per device = 24\n",
"[INFO|trainer.py:1611] 2023-02-14 21:45:13,109 >> Total train batch size (w. parallel, distributed & accumulation) = 24\n",
"[INFO|trainer.py:1612] 2023-02-14 21:45:13,109 >> Gradient Accumulation steps = 1\n",
"[INFO|trainer.py:1613] 2023-02-14 21:45:13,109 >> Total optimization steps = 667\n",
"{'loss': 0.8083, 'learning_rate': 5.0074962518740634e-06, 'epoch': 0.75}\n",
" 75% 500/667 [00:58<00:19, 8.76it/s][INFO|trainer.py:2656] 2023-02-14 21:46:11,148 >> Saving model checkpoint to out/emotion/roberta/checkpoint-500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:46:11,149 >> Configuration saved in out/emotion/roberta/checkpoint-500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:46:12,047 >> Model weights saved in out/emotion/roberta/checkpoint-500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:46:12,048 >> tokenizer config file saved in out/emotion/roberta/checkpoint-500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:46:12,048 >> Special tokens file saved in out/emotion/roberta/checkpoint-500/special_tokens_map.json\n",
"100% 666/667 [01:19<00:00, 8.78it/s][INFO|trainer.py:1852] 2023-02-14 21:46:32,443 >> \n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"{'train_runtime': 79.3341, 'train_samples_per_second': 201.679, 'train_steps_per_second': 8.407, 'train_loss': 0.7161429089227359, 'epoch': 1.0}\n",
"100% 667/667 [01:19<00:00, 8.41it/s]\n",
"[INFO|trainer.py:2656] 2023-02-14 21:46:32,445 >> Saving model checkpoint to out/emotion/roberta\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:46:32,446 >> Configuration saved in out/emotion/roberta/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:46:33,422 >> Model weights saved in out/emotion/roberta/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:46:33,422 >> tokenizer config file saved in out/emotion/roberta/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:46:33,423 >> Special tokens file saved in out/emotion/roberta/special_tokens_map.json\n",
"***** train metrics *****\n",
" epoch = 1.0\n",
" train_loss = 0.7161\n",
" train_runtime = 0:01:19.33\n",
" train_samples = 16000\n",
" train_samples_per_second = 201.679\n",
" train_steps_per_second = 8.407\n",
"INFO:__main__:*** Evaluate ***\n",
"[INFO|trainer.py:725] 2023-02-14 21:46:33,524 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:46:33,526 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:46:33,526 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:46:33,526 >> Batch size = 24\n",
"100% 84/84 [00:03<00:00, 23.66it/s]\n",
"***** eval metrics *****\n",
" epoch = 1.0\n",
" eval_accuracy = 0.889\n",
" eval_loss = 0.3302\n",
" eval_runtime = 0:00:03.59\n",
" eval_samples = 2000\n",
" eval_samples_per_second = 556.411\n",
" eval_steps_per_second = 23.369\n",
"INFO:__main__:*** Predict ***\n",
"[INFO|trainer.py:725] 2023-02-14 21:46:37,124 >> The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:46:37,125 >> ***** Running Prediction *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:46:37,125 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:46:37,125 >> Batch size = 24\n",
"100% 84/84 [00:03<00:00, 23.68it/s]\n",
"INFO:__main__:***** Predict results None *****\n",
"[INFO|modelcard.py:444] 2023-02-14 21:46:40,840 >> Dropping the following result as it does not have all the necessary fields:\n",
"{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.8889999985694885}]}\n"
]
}
],
"source": [
"!python run_glue.py \\\n",
" --cache_dir roberta_training_cache \\\n",
" --model_name_or_path roberta-base \\\n",
" --train_file data/train.json \\\n",
" --validation_file data/valid.json \\\n",
" --test_file data/test.json \\\n",
" --per_device_train_batch_size 24 \\\n",
" --per_device_eval_batch_size 24 \\\n",
" --do_train \\\n",
" --do_eval \\\n",
" --do_predict \\\n",
" --max_seq_length 128 \\\n",
" --learning_rate 2e-5 \\\n",
" --num_train_epochs 1 \\\n",
" --output_dir out/emotion/roberta \\\n",
" --overwrite_output_dir"
]
},
{
"cell_type": "markdown",
"source": [
"- full data\n",
"- sequence length: 128\n",
"- leakyRelu instad of relu\n",
"- every other layer frozen\n",
"- custom head"
],
"metadata": {
"id": "b1iFFLFAf9PC"
}
},
{
"cell_type": "code",
"source": [
"!python run_glue.py \\\n",
" --cache_dir roberta_custom_training_cache \\\n",
" --model_name_or_path roberta-base \\\n",
" --custom_model roberta_custom \\\n",
" --train_file data/train.json \\\n",
" --validation_file data/valid.json \\\n",
" --test_file data/test.json \\\n",
" --per_device_train_batch_size 24 \\\n",
" --per_device_eval_batch_size 24 \\\n",
" --do_train \\\n",
" --do_eval \\\n",
" --do_predict \\\n",
" --max_seq_length 128 \\\n",
" --learning_rate 2e-5 \\\n",
" --num_train_epochs 1 \\\n",
" --output_dir out/emotion/roberta_custom \\\n",
" --overwrite_output_dir"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "WzRBwNKqkDAk",
"outputId": "8d042117-3af6-4041-d1a5-d70024df24fb"
},
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"2023-02-14 21:47:02.722049: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2023-02-14 21:47:02.876002: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
"2023-02-14 21:47:03.659342: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 21:47:03.659451: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 21:47:03.659470: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
"WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
"INFO:__main__:Training/evaluation parameters TrainingArguments(\n",
"_n_gpu=1,\n",
"adafactor=False,\n",
"adam_beta1=0.9,\n",
"adam_beta2=0.999,\n",
"adam_epsilon=1e-08,\n",
"auto_find_batch_size=False,\n",
"bf16=False,\n",
"bf16_full_eval=False,\n",
"data_seed=None,\n",
"dataloader_drop_last=False,\n",
"dataloader_num_workers=0,\n",
"dataloader_pin_memory=True,\n",
"ddp_bucket_cap_mb=None,\n",
"ddp_find_unused_parameters=None,\n",
"ddp_timeout=1800,\n",
"debug=[],\n",
"deepspeed=None,\n",
"disable_tqdm=False,\n",
"do_eval=True,\n",
"do_predict=True,\n",
"do_train=True,\n",
"eval_accumulation_steps=None,\n",
"eval_delay=0,\n",
"eval_steps=None,\n",
"evaluation_strategy=no,\n",
"fp16=False,\n",
"fp16_backend=auto,\n",
"fp16_full_eval=False,\n",
"fp16_opt_level=O1,\n",
"fsdp=[],\n",
"fsdp_min_num_params=0,\n",
"fsdp_transformer_layer_cls_to_wrap=None,\n",
"full_determinism=False,\n",
"gradient_accumulation_steps=1,\n",
"gradient_checkpointing=False,\n",
"greater_is_better=None,\n",
"group_by_length=False,\n",
"half_precision_backend=auto,\n",
"hub_model_id=None,\n",
"hub_private_repo=False,\n",
"hub_strategy=every_save,\n",
"hub_token=<HUB_TOKEN>,\n",
"ignore_data_skip=False,\n",
"include_inputs_for_metrics=False,\n",
"jit_mode_eval=False,\n",
"label_names=None,\n",
"label_smoothing_factor=0.0,\n",
"learning_rate=2e-05,\n",
"length_column_name=length,\n",
"load_best_model_at_end=False,\n",
"local_rank=-1,\n",
"log_level=passive,\n",
"log_level_replica=passive,\n",
"log_on_each_node=True,\n",
"logging_dir=out/emotion/roberta_custom/runs/Feb14_21-47-05_fc0011e45a00,\n",
"logging_first_step=False,\n",
"logging_nan_inf_filter=True,\n",
"logging_steps=500,\n",
"logging_strategy=steps,\n",
"lr_scheduler_type=linear,\n",
"max_grad_norm=1.0,\n",
"max_steps=-1,\n",
"metric_for_best_model=None,\n",
"mp_parameters=,\n",
"no_cuda=False,\n",
"num_train_epochs=1.0,\n",
"optim=adamw_hf,\n",
"output_dir=out/emotion/roberta_custom,\n",
"overwrite_output_dir=True,\n",
"past_index=-1,\n",
"per_device_eval_batch_size=24,\n",
"per_device_train_batch_size=24,\n",
"prediction_loss_only=False,\n",
"push_to_hub=False,\n",
"push_to_hub_model_id=None,\n",
"push_to_hub_organization=None,\n",
"push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
"ray_scope=last,\n",
"remove_unused_columns=True,\n",
"report_to=['tensorboard'],\n",
"resume_from_checkpoint=None,\n",
"run_name=out/emotion/roberta_custom,\n",
"save_on_each_node=False,\n",
"save_steps=500,\n",
"save_strategy=steps,\n",
"save_total_limit=None,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torchdynamo=None,\n",
"tpu_metrics_debug=False,\n",
"tpu_num_cores=None,\n",
"use_ipex=False,\n",
"use_legacy_prediction_loop=False,\n",
"use_mps_device=False,\n",
"warmup_ratio=0.0,\n",
"warmup_steps=0,\n",
"weight_decay=0.0,\n",
"xpu_backend=None,\n",
")\n",
"INFO:__main__:load a local file for train: data/train.json\n",
"INFO:__main__:load a local file for validation: data/valid.json\n",
"INFO:__main__:load a local file for test: data/test.json\n",
"WARNING:datasets.builder:Using custom data configuration default-01aa9d8252a24a0d\n",
"INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n",
"INFO:datasets.builder:Generating dataset json (/content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
"Downloading and preparing dataset json/default to /content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n",
"Downloading data files: 100% 3/3 [00:00<00:00, 14463.12it/s]\n",
"INFO:datasets.download.download_manager:Downloading took 0.0 min\n",
"INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n",
"Extracting data files: 100% 3/3 [00:00<00:00, 2119.76it/s]\n",
"INFO:datasets.utils.info_utils:Unable to verify checksums.\n",
"INFO:datasets.builder:Generating train split\n",
"INFO:datasets.builder:Generating validation split\n",
"INFO:datasets.builder:Generating test split\n",
"INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n",
"Dataset json downloaded and prepared to /content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n",
"100% 3/3 [00:00<00:00, 657.14it/s]\n",
"Downloading (…)lve/main/config.json: 100% 481/481 [00:00<00:00, 88.4kB/s]\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:47:06,896 >> loading configuration file config.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:47:06,897 >> Model config RobertaConfig {\n",
" \"_name_or_path\": \"roberta-base\",\n",
" \"architectures\": [\n",
" \"RobertaForMaskedLM\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"bos_token_id\": 0,\n",
" \"classifier_dropout\": null,\n",
" \"eos_token_id\": 2,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"id2label\": {\n",
" \"0\": \"LABEL_0\",\n",
" \"1\": \"LABEL_1\",\n",
" \"2\": \"LABEL_2\",\n",
" \"3\": \"LABEL_3\",\n",
" \"4\": \"LABEL_4\",\n",
" \"5\": \"LABEL_5\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"label2id\": {\n",
" \"LABEL_0\": 0,\n",
" \"LABEL_1\": 1,\n",
" \"LABEL_2\": 2,\n",
" \"LABEL_3\": 3,\n",
" \"LABEL_4\": 4,\n",
" \"LABEL_5\": 5\n",
" },\n",
" \"layer_norm_eps\": 1e-05,\n",
" \"max_position_embeddings\": 514,\n",
" \"model_type\": \"roberta\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"pad_token_id\": 1,\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 1,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50265\n",
"}\n",
"\n",
"[INFO|tokenization_auto.py:418] 2023-02-14 21:47:06,989 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:47:07,079 >> loading configuration file config.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:47:07,080 >> Model config RobertaConfig {\n",
" \"_name_or_path\": \"roberta-base\",\n",
" \"architectures\": [\n",
" \"RobertaForMaskedLM\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"bos_token_id\": 0,\n",
" \"classifier_dropout\": null,\n",
" \"eos_token_id\": 2,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"layer_norm_eps\": 1e-05,\n",
" \"max_position_embeddings\": 514,\n",
" \"model_type\": \"roberta\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"pad_token_id\": 1,\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 1,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50265\n",
"}\n",
"\n",
"Downloading (…)olve/main/vocab.json: 100% 899k/899k [00:00<00:00, 9.35MB/s]\n",
"Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 4.91MB/s]\n",
"Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 10.3MB/s]\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file vocab.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/vocab.json\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file merges.txt from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/merges.txt\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file tokenizer.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/tokenizer.json\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file added_tokens.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file special_tokens_map.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:47:08,305 >> loading file tokenizer_config.json from cache at None\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:47:08,306 >> loading configuration file config.json from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:47:08,306 >> Model config RobertaConfig {\n",
" \"_name_or_path\": \"roberta-base\",\n",
" \"architectures\": [\n",
" \"RobertaForMaskedLM\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"bos_token_id\": 0,\n",
" \"classifier_dropout\": null,\n",
" \"eos_token_id\": 2,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"layer_norm_eps\": 1e-05,\n",
" \"max_position_embeddings\": 514,\n",
" \"model_type\": \"roberta\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"pad_token_id\": 1,\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"type_vocab_size\": 1,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50265\n",
"}\n",
"\n",
"INFO:__main__:Using hidden states in model: False\n",
"INFO:__main__:Using implementation from class: RobertaForSequenceClassificationCustomAlternative\n",
"Downloading (…)\"pytorch_model.bin\";: 100% 501M/501M [00:04<00:00, 106MB/s]\n",
"[INFO|modeling_utils.py:2156] 2023-02-14 21:47:13,300 >> loading weights file pytorch_model.bin from cache at roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/pytorch_model.bin\n",
"[WARNING|modeling_utils.py:2596] 2023-02-14 21:47:15,772 >> Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassificationCustomAlternative: ['roberta.pooler.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight']\n",
"- This IS expected if you are initializing RobertaForSequenceClassificationCustomAlternative from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing RobertaForSequenceClassificationCustomAlternative from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"[WARNING|modeling_utils.py:2608] 2023-02-14 21:47:15,772 >> Some weights of RobertaForSequenceClassificationCustomAlternative were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense_1_input.weight', 'classifier.dense_2.weight', 'classifier.out_proj.bias', 'classifier.dense_2.bias', 'classifier.dense_1_input.bias', 'classifier.dense_1_hidden.weight', 'classifier.dense_1_hidden.bias', 'classifier.out_proj.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
"\n",
"\n",
"Frozen layers:\n",
"[('roberta.encoder.layer.0.attention.self.query.weight', False), ('roberta.encoder.layer.0.attention.self.query.bias', False), ('roberta.encoder.layer.0.attention.self.key.weight', False), ('roberta.encoder.layer.0.attention.self.key.bias', False), ('roberta.encoder.layer.0.attention.self.value.weight', False), ('roberta.encoder.layer.0.attention.self.value.bias', False), ('roberta.encoder.layer.0.attention.output.dense.weight', False), ('roberta.encoder.layer.0.attention.output.dense.bias', False), ('roberta.encoder.layer.0.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.0.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.0.intermediate.dense.weight', False), ('roberta.encoder.layer.0.intermediate.dense.bias', False), ('roberta.encoder.layer.0.output.dense.weight', False), ('roberta.encoder.layer.0.output.dense.bias', False), ('roberta.encoder.layer.0.output.LayerNorm.weight', False), ('roberta.encoder.layer.0.output.LayerNorm.bias', False), ('roberta.encoder.layer.2.attention.self.query.weight', False), ('roberta.encoder.layer.2.attention.self.query.bias', False), ('roberta.encoder.layer.2.attention.self.key.weight', False), ('roberta.encoder.layer.2.attention.self.key.bias', False), ('roberta.encoder.layer.2.attention.self.value.weight', False), ('roberta.encoder.layer.2.attention.self.value.bias', False), ('roberta.encoder.layer.2.attention.output.dense.weight', False), ('roberta.encoder.layer.2.attention.output.dense.bias', False), ('roberta.encoder.layer.2.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.2.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.2.intermediate.dense.weight', False), ('roberta.encoder.layer.2.intermediate.dense.bias', False), ('roberta.encoder.layer.2.output.dense.weight', False), ('roberta.encoder.layer.2.output.dense.bias', False), ('roberta.encoder.layer.2.output.LayerNorm.weight', False), ('roberta.encoder.layer.2.output.LayerNorm.bias', False), ('roberta.encoder.layer.4.attention.self.query.weight', False), ('roberta.encoder.layer.4.attention.self.query.bias', False), ('roberta.encoder.layer.4.attention.self.key.weight', False), ('roberta.encoder.layer.4.attention.self.key.bias', False), ('roberta.encoder.layer.4.attention.self.value.weight', False), ('roberta.encoder.layer.4.attention.self.value.bias', False), ('roberta.encoder.layer.4.attention.output.dense.weight', False), ('roberta.encoder.layer.4.attention.output.dense.bias', False), ('roberta.encoder.layer.4.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.4.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.4.intermediate.dense.weight', False), ('roberta.encoder.layer.4.intermediate.dense.bias', False), ('roberta.encoder.layer.4.output.dense.weight', False), ('roberta.encoder.layer.4.output.dense.bias', False), ('roberta.encoder.layer.4.output.LayerNorm.weight', False), ('roberta.encoder.layer.4.output.LayerNorm.bias', False), ('roberta.encoder.layer.6.attention.self.query.weight', False), ('roberta.encoder.layer.6.attention.self.query.bias', False), ('roberta.encoder.layer.6.attention.self.key.weight', False), ('roberta.encoder.layer.6.attention.self.key.bias', False), ('roberta.encoder.layer.6.attention.self.value.weight', False), ('roberta.encoder.layer.6.attention.self.value.bias', False), ('roberta.encoder.layer.6.attention.output.dense.weight', False), ('roberta.encoder.layer.6.attention.output.dense.bias', False), ('roberta.encoder.layer.6.attention.output.LayerNorm.weight', False), ('roberta.encoder.layer.6.attention.output.LayerNorm.bias', False), ('roberta.encoder.layer.6.intermediate.dense.weight', False), ('roberta.encoder.layer.6.intermediate.dense.bias', False), ('roberta.encoder.layer.6.output.dense.weight', False), ('roberta.encoder.layer.6.output.dense.bias', False), ('roberta.encoder.layer.6.output.LayerNorm.weight', False), ('roberta.encoder.layer.6.output.LayerNorm.bias', False), ('roberta.encoder.layer.8.attention.self.query.weight', False), ('roberta.encoder.layer.8.attention.self.query.bi
"\n",
"\n",
"Running tokenizer on dataset: 0% 0/16 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-e62b2012f3f40cb2.arrow\n",
"Running tokenizer on dataset: 100% 16/16 [00:01<00:00, 15.42ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-cd497527f5c67ba7.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 7.47ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-9c2deb15eb4326c1.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 19.76ba/s]\n",
"INFO:__main__:Sample 10476 of the training set: {'label': 0, 'text': 'i do find new friends i m going to try extra hard to make them stay and if i decide that i don t want to feel hurt again and just ride out the last year of school on my own i m going to have to try extra hard not to care what people think of me being a loner', 'input_ids': [0, 118, 109, 465, 92, 964, 939, 475, 164, 7, 860, 1823, 543, 7, 146, 106, 1095, 8, 114, 939, 2845, 14, 939, 218, 326, 236, 7, 619, 2581, 456, 8, 95, 3068, 66, 5, 94, 76, 9, 334, 15, 127, 308, 939, 475, 164, 7, 33, 7, 860, 1823, 543, 45, 7, 575, 99, 82, 206, 9, 162, 145, 10, 784, 9604, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 1824 of the training set: {'label': 1, 'text': 'i asked them to join me in creating a world where all year old girls could grow up feeling hopeful and powerful', 'input_ids': [0, 118, 553, 106, 7, 1962, 162, 11, 2351, 10, 232, 147, 70, 76, 793, 1972, 115, 1733, 62, 2157, 7917, 8, 2247, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 409 of the training set: {'label': 2, 'text': 'i feel when you are a caring person you attract other caring people into your life', 'input_ids': [0, 118, 619, 77, 47, 32, 10, 10837, 621, 47, 5696, 97, 10837, 82, 88, 110, 301, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"[INFO|trainer.py:725] 2023-02-14 21:47:19,642 >> The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassificationCustomAlternative.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassificationCustomAlternative.forward`, you can safely ignore this message.\n",
"/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" warnings.warn(\n",
"[INFO|trainer.py:1607] 2023-02-14 21:47:19,649 >> ***** Running training *****\n",
"[INFO|trainer.py:1608] 2023-02-14 21:47:19,649 >> Num examples = 16000\n",
"[INFO|trainer.py:1609] 2023-02-14 21:47:19,649 >> Num Epochs = 1\n",
"[INFO|trainer.py:1610] 2023-02-14 21:47:19,649 >> Instantaneous batch size per device = 24\n",
"[INFO|trainer.py:1611] 2023-02-14 21:47:19,649 >> Total train batch size (w. parallel, distributed & accumulation) = 24\n",
"[INFO|trainer.py:1612] 2023-02-14 21:47:19,649 >> Gradient Accumulation steps = 1\n",
"[INFO|trainer.py:1613] 2023-02-14 21:47:19,649 >> Total optimization steps = 667\n",
"{'loss': 0.8955, 'learning_rate': 5.0074962518740634e-06, 'epoch': 0.75}\n",
" 75% 500/667 [00:58<00:19, 8.75it/s][INFO|trainer.py:2656] 2023-02-14 21:48:17,996 >> Saving model checkpoint to out/emotion/roberta_custom/checkpoint-500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:48:17,997 >> Configuration saved in out/emotion/roberta_custom/checkpoint-500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:48:19,015 >> Model weights saved in out/emotion/roberta_custom/checkpoint-500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:48:19,016 >> tokenizer config file saved in out/emotion/roberta_custom/checkpoint-500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:48:19,016 >> Special tokens file saved in out/emotion/roberta_custom/checkpoint-500/special_tokens_map.json\n",
"100% 666/667 [01:20<00:00, 8.66it/s][INFO|trainer.py:1852] 2023-02-14 21:48:40,745 >> \n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"{'train_runtime': 81.0963, 'train_samples_per_second': 197.296, 'train_steps_per_second': 8.225, 'train_loss': 0.8004468377383573, 'epoch': 1.0}\n",
"100% 667/667 [01:21<00:00, 8.23it/s]\n",
"[INFO|trainer.py:2656] 2023-02-14 21:48:40,747 >> Saving model checkpoint to out/emotion/roberta_custom\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:48:40,748 >> Configuration saved in out/emotion/roberta_custom/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:48:41,796 >> Model weights saved in out/emotion/roberta_custom/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:48:41,797 >> tokenizer config file saved in out/emotion/roberta_custom/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:48:41,797 >> Special tokens file saved in out/emotion/roberta_custom/special_tokens_map.json\n",
"***** train metrics *****\n",
" epoch = 1.0\n",
" train_loss = 0.8004\n",
" train_runtime = 0:01:21.09\n",
" train_samples = 16000\n",
" train_samples_per_second = 197.296\n",
" train_steps_per_second = 8.225\n",
"INFO:__main__:*** Evaluate ***\n",
"[INFO|trainer.py:725] 2023-02-14 21:48:41,898 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassificationCustomAlternative.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassificationCustomAlternative.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:48:41,899 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:48:41,900 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:48:41,900 >> Batch size = 24\n",
"100% 84/84 [00:03<00:00, 23.62it/s]\n",
"***** eval metrics *****\n",
" epoch = 1.0\n",
" eval_accuracy = 0.867\n",
" eval_loss = 0.39\n",
" eval_runtime = 0:00:03.59\n",
" eval_samples = 2000\n",
" eval_samples_per_second = 555.583\n",
" eval_steps_per_second = 23.334\n",
"INFO:__main__:*** Predict ***\n",
"[INFO|trainer.py:725] 2023-02-14 21:48:45,503 >> The following columns in the test set don't have a corresponding argument in `RobertaForSequenceClassificationCustomAlternative.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassificationCustomAlternative.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:48:45,504 >> ***** Running Prediction *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:48:45,504 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:48:45,504 >> Batch size = 24\n",
"100% 84/84 [00:03<00:00, 23.74it/s]\n",
"INFO:__main__:***** Predict results None *****\n",
"[INFO|modelcard.py:444] 2023-02-14 21:48:49,211 >> Dropping the following result as it does not have all the necessary fields:\n",
"{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.8669999837875366}]}\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "HUdoRk5o7ICl"
},
"source": [
"## **GPT2**"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "exFg0yb-7ICl"
},
"source": [
"- full data\n",
"- model `GPT2`\n",
"- sequnece length: 128\n",
"- training epoch: 1"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"id": "DMHK35db7ICl",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "5a3776f5-7feb-480b-a433-a80ed81f3eb7"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"2023-02-14 21:48:52.605236: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2023-02-14 21:48:52.757779: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
"2023-02-14 21:48:53.540701: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 21:48:53.540799: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 21:48:53.540819: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
"WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
"INFO:__main__:Training/evaluation parameters TrainingArguments(\n",
"_n_gpu=1,\n",
"adafactor=False,\n",
"adam_beta1=0.9,\n",
"adam_beta2=0.999,\n",
"adam_epsilon=1e-08,\n",
"auto_find_batch_size=False,\n",
"bf16=False,\n",
"bf16_full_eval=False,\n",
"data_seed=None,\n",
"dataloader_drop_last=False,\n",
"dataloader_num_workers=0,\n",
"dataloader_pin_memory=True,\n",
"ddp_bucket_cap_mb=None,\n",
"ddp_find_unused_parameters=None,\n",
"ddp_timeout=1800,\n",
"debug=[],\n",
"deepspeed=None,\n",
"disable_tqdm=False,\n",
"do_eval=True,\n",
"do_predict=True,\n",
"do_train=True,\n",
"eval_accumulation_steps=None,\n",
"eval_delay=0,\n",
"eval_steps=250,\n",
"evaluation_strategy=steps,\n",
"fp16=False,\n",
"fp16_backend=auto,\n",
"fp16_full_eval=False,\n",
"fp16_opt_level=O1,\n",
"fsdp=[],\n",
"fsdp_min_num_params=0,\n",
"fsdp_transformer_layer_cls_to_wrap=None,\n",
"full_determinism=False,\n",
"gradient_accumulation_steps=1,\n",
"gradient_checkpointing=False,\n",
"greater_is_better=True,\n",
"group_by_length=False,\n",
"half_precision_backend=auto,\n",
"hub_model_id=None,\n",
"hub_private_repo=False,\n",
"hub_strategy=every_save,\n",
"hub_token=<HUB_TOKEN>,\n",
"ignore_data_skip=False,\n",
"include_inputs_for_metrics=False,\n",
"jit_mode_eval=False,\n",
"label_names=None,\n",
"label_smoothing_factor=0.0,\n",
"learning_rate=2e-05,\n",
"length_column_name=length,\n",
"load_best_model_at_end=True,\n",
"local_rank=-1,\n",
"log_level=passive,\n",
"log_level_replica=passive,\n",
"log_on_each_node=True,\n",
"logging_dir=out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00,\n",
"logging_first_step=False,\n",
"logging_nan_inf_filter=True,\n",
"logging_steps=100,\n",
"logging_strategy=steps,\n",
"lr_scheduler_type=linear,\n",
"max_grad_norm=1.0,\n",
"max_steps=2500,\n",
"metric_for_best_model=accuracy,\n",
"mp_parameters=,\n",
"no_cuda=False,\n",
"num_train_epochs=1.0,\n",
"optim=adamw_hf,\n",
"output_dir=out/emotion/gpt2,\n",
"overwrite_output_dir=True,\n",
"past_index=-1,\n",
"per_device_eval_batch_size=24,\n",
"per_device_train_batch_size=24,\n",
"prediction_loss_only=False,\n",
"push_to_hub=False,\n",
"push_to_hub_model_id=None,\n",
"push_to_hub_organization=None,\n",
"push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
"ray_scope=last,\n",
"remove_unused_columns=True,\n",
"report_to=['tensorboard'],\n",
"resume_from_checkpoint=None,\n",
"run_name=out/emotion/gpt2,\n",
"save_on_each_node=False,\n",
"save_steps=500,\n",
"save_strategy=steps,\n",
"save_total_limit=5,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torchdynamo=None,\n",
"tpu_metrics_debug=False,\n",
"tpu_num_cores=None,\n",
"use_ipex=False,\n",
"use_legacy_prediction_loop=False,\n",
"use_mps_device=False,\n",
"warmup_ratio=0.0,\n",
"warmup_steps=0,\n",
"weight_decay=0.0,\n",
"xpu_backend=None,\n",
")\n",
"INFO:__main__:load a local file for train: data/train.json\n",
"INFO:__main__:load a local file for validation: data/valid.json\n",
"INFO:__main__:load a local file for test: data/test.json\n",
"WARNING:datasets.builder:Using custom data configuration default-01aa9d8252a24a0d\n",
"INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n",
"INFO:datasets.builder:Generating dataset json (/content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
"Downloading and preparing dataset json/default to /content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n",
"Downloading data files: 100% 3/3 [00:00<00:00, 12169.16it/s]\n",
"INFO:datasets.download.download_manager:Downloading took 0.0 min\n",
"INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n",
"Extracting data files: 100% 3/3 [00:00<00:00, 2183.40it/s]\n",
"INFO:datasets.utils.info_utils:Unable to verify checksums.\n",
"INFO:datasets.builder:Generating train split\n",
"INFO:datasets.builder:Generating validation split\n",
"INFO:datasets.builder:Generating test split\n",
"INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n",
"Dataset json downloaded and prepared to /content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n",
"100% 3/3 [00:00<00:00, 665.62it/s]\n",
"Downloading (…)lve/main/config.json: 100% 665/665 [00:00<00:00, 125kB/s]\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:48:57,052 >> loading configuration file config.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:48:57,053 >> Model config GPT2Config {\n",
" \"_name_or_path\": \"gpt2\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"id2label\": {\n",
" \"0\": \"LABEL_0\",\n",
" \"1\": \"LABEL_1\",\n",
" \"2\": \"LABEL_2\",\n",
" \"3\": \"LABEL_3\",\n",
" \"4\": \"LABEL_4\",\n",
" \"5\": \"LABEL_5\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"label2id\": {\n",
" \"LABEL_0\": 0,\n",
" \"LABEL_1\": 1,\n",
" \"LABEL_2\": 2,\n",
" \"LABEL_3\": 3,\n",
" \"LABEL_4\": 4,\n",
" \"LABEL_5\": 5\n",
" },\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"reorder_and_upcast_attn\": false,\n",
" \"resid_pdrop\": 0.1,\n",
" \"scale_attn_by_inverse_layer_idx\": false,\n",
" \"scale_attn_weights\": true,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
"[INFO|tokenization_auto.py:418] 2023-02-14 21:48:57,145 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:48:57,236 >> loading configuration file config.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:48:57,237 >> Model config GPT2Config {\n",
" \"_name_or_path\": \"gpt2\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"initializer_range\": 0.02,\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"reorder_and_upcast_attn\": false,\n",
" \"resid_pdrop\": 0.1,\n",
" \"scale_attn_by_inverse_layer_idx\": false,\n",
" \"scale_attn_weights\": true,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
"Downloading (…)olve/main/vocab.json: 100% 1.04M/1.04M [00:00<00:00, 9.20MB/s]\n",
"Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 6.19MB/s]\n",
"Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 11.7MB/s]\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file vocab.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/vocab.json\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file merges.txt from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/merges.txt\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file tokenizer.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer.json\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file added_tokens.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file special_tokens_map.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:48:58,447 >> loading file tokenizer_config.json from cache at None\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:48:58,447 >> loading configuration file config.json from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:48:58,448 >> Model config GPT2Config {\n",
" \"_name_or_path\": \"gpt2\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"initializer_range\": 0.02,\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"reorder_and_upcast_attn\": false,\n",
" \"resid_pdrop\": 0.1,\n",
" \"scale_attn_by_inverse_layer_idx\": false,\n",
" \"scale_attn_weights\": true,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
"INFO:__main__:Using implementation from class: AutoModelForSequenceClassification\n",
"Downloading (…)\"pytorch_model.bin\";: 100% 548M/548M [00:05<00:00, 108MB/s]\n",
"[INFO|modeling_utils.py:2156] 2023-02-14 21:49:03,784 >> loading weights file pytorch_model.bin from cache at gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin\n",
"[INFO|modeling_utils.py:2606] 2023-02-14 21:49:05,169 >> All model checkpoint weights were used when initializing GPT2ForSequenceClassification.\n",
"\n",
"[WARNING|modeling_utils.py:2608] 2023-02-14 21:49:05,169 >> Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
"[ERROR|tokenization_utils_base.py:1019] 2023-02-14 21:49:05,177 >> Using pad_token, but it is not set yet.\n",
"INFO:__main__:Set PAD token to EOS: <|endoftext|>\n",
"Running tokenizer on dataset: 0% 0/16 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-bb8faaac56c0b87e.arrow\n",
"Running tokenizer on dataset: 100% 16/16 [00:00<00:00, 20.23ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-7b339bb99d7c17a1.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 20.04ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-82acdaa33d6aa0eb.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 20.92ba/s]\n",
"INFO:__main__:Sample 10476 of the training set: {'label': 0, 'text': 'i do find new friends i m going to try extra hard to make them stay and if i decide that i don t want to feel hurt again and just ride out the last year of school on my own i m going to have to try extra hard not to care what people think of me being a loner', 'input_ids': [72, 466, 1064, 649, 2460, 1312, 285, 1016, 284, 1949, 3131, 1327, 284, 787, 606, 2652, 290, 611, 1312, 5409, 326, 1312, 836, 256, 765, 284, 1254, 5938, 757, 290, 655, 6594, 503, 262, 938, 614, 286, 1524, 319, 616, 898, 1312, 285, 1016, 284, 423, 284, 1949, 3131, 1327, 407, 284, 1337, 644, 661, 892, 286, 502, 852, 257, 300, 14491, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 1824 of the training set: {'label': 1, 'text': 'i asked them to join me in creating a world where all year old girls could grow up feeling hopeful and powerful', 'input_ids': [72, 1965, 606, 284, 4654, 502, 287, 4441, 257, 995, 810, 477, 614, 1468, 4813, 714, 1663, 510, 4203, 17836, 290, 3665, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 409 of the training set: {'label': 2, 'text': 'i feel when you are a caring person you attract other caring people into your life', 'input_ids': [72, 1254, 618, 345, 389, 257, 18088, 1048, 345, 4729, 584, 18088, 661, 656, 534, 1204, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"[INFO|trainer.py:503] 2023-02-14 21:49:08,712 >> max_steps is given, it will override any value given in num_train_epochs\n",
"[INFO|trainer.py:725] 2023-02-14 21:49:08,712 >> The following columns in the training set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" warnings.warn(\n",
"[INFO|trainer.py:1607] 2023-02-14 21:49:08,718 >> ***** Running training *****\n",
"[INFO|trainer.py:1608] 2023-02-14 21:49:08,718 >> Num examples = 16000\n",
"[INFO|trainer.py:1609] 2023-02-14 21:49:08,718 >> Num Epochs = 4\n",
"[INFO|trainer.py:1610] 2023-02-14 21:49:08,719 >> Instantaneous batch size per device = 24\n",
"[INFO|trainer.py:1611] 2023-02-14 21:49:08,719 >> Total train batch size (w. parallel, distributed & accumulation) = 24\n",
"[INFO|trainer.py:1612] 2023-02-14 21:49:08,719 >> Gradient Accumulation steps = 1\n",
"[INFO|trainer.py:1613] 2023-02-14 21:49:08,719 >> Total optimization steps = 2500\n",
"{'loss': 2.3442, 'learning_rate': 1.9200000000000003e-05, 'epoch': 0.15}\n",
"{'loss': 1.3126, 'learning_rate': 1.8400000000000003e-05, 'epoch': 0.3}\n",
" 10% 250/2500 [00:37<05:31, 6.79it/s][INFO|trainer.py:725] 2023-02-14 21:49:46,426 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:49:46,428 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:49:46,428 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:49:46,428 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 4% 3/84 [00:00<00:02, 29.40it/s]\u001b[A\n",
" 7% 6/84 [00:00<00:03, 23.74it/s]\u001b[A\n",
" 11% 9/84 [00:00<00:03, 22.40it/s]\u001b[A\n",
" 14% 12/84 [00:00<00:03, 21.78it/s]\u001b[A\n",
" 18% 15/84 [00:00<00:03, 21.50it/s]\u001b[A\n",
" 21% 18/84 [00:00<00:03, 21.30it/s]\u001b[A\n",
" 25% 21/84 [00:00<00:02, 21.20it/s]\u001b[A\n",
" 29% 24/84 [00:01<00:02, 20.97it/s]\u001b[A\n",
" 32% 27/84 [00:01<00:02, 20.93it/s]\u001b[A\n",
" 36% 30/84 [00:01<00:02, 20.97it/s]\u001b[A\n",
" 39% 33/84 [00:01<00:02, 21.00it/s]\u001b[A\n",
" 43% 36/84 [00:01<00:02, 21.01it/s]\u001b[A\n",
" 46% 39/84 [00:01<00:02, 21.03it/s]\u001b[A\n",
" 50% 42/84 [00:01<00:01, 21.03it/s]\u001b[A\n",
" 54% 45/84 [00:02<00:01, 21.02it/s]\u001b[A\n",
" 57% 48/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
" 61% 51/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
" 64% 54/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
" 68% 57/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
" 71% 60/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
" 75% 63/84 [00:02<00:00, 21.00it/s]\u001b[A\n",
" 79% 66/84 [00:03<00:00, 20.99it/s]\u001b[A\n",
" 82% 69/84 [00:03<00:00, 20.94it/s]\u001b[A\n",
" 86% 72/84 [00:03<00:00, 20.95it/s]\u001b[A\n",
" 89% 75/84 [00:03<00:00, 20.98it/s]\u001b[A\n",
" 93% 78/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
" 96% 81/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
"100% 84/84 [00:03<00:00, 22.24it/s]\u001b[A\n",
"{'eval_loss': 0.7983964085578918, 'eval_accuracy': 0.7465000152587891, 'eval_runtime': 3.9877, 'eval_samples_per_second': 501.548, 'eval_steps_per_second': 21.065, 'epoch': 0.37}\n",
"\n",
" 10% 250/2500 [00:41<05:31, 6.79it/s]\n",
"{'loss': 0.7216, 'learning_rate': 1.76e-05, 'epoch': 0.45}\n",
"{'loss': 0.5032, 'learning_rate': 1.6800000000000002e-05, 'epoch': 0.6}\n",
"{'loss': 0.3904, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.75}\n",
" 20% 500/2500 [01:18<04:56, 6.74it/s][INFO|trainer.py:725] 2023-02-14 21:50:27,312 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:50:27,314 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:50:27,314 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:50:27,314 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.77it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.71it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 22.34it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.72it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 21.40it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 21.09it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:02, 21.01it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.95it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.92it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.87it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.91it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.95it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.91it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:01, 20.96it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.82it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.87it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.90it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.94it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.97it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 21.01it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 21.01it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 21.03it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 21.02it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 21.02it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
"{'eval_loss': 0.29131895303726196, 'eval_accuracy': 0.9035000205039978, 'eval_runtime': 3.9922, 'eval_samples_per_second': 500.974, 'eval_steps_per_second': 21.041, 'epoch': 0.75}\n",
"\n",
" 20% 500/2500 [01:22<04:56, 6.74it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:50:31,307 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:50:31,308 >> Configuration saved in out/emotion/gpt2/checkpoint-500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:50:32,356 >> Model weights saved in out/emotion/gpt2/checkpoint-500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:50:32,357 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:50:32,357 >> Special tokens file saved in out/emotion/gpt2/checkpoint-500/special_tokens_map.json\n",
"{'loss': 0.3554, 'learning_rate': 1.5200000000000002e-05, 'epoch': 0.9}\n",
"{'loss': 0.2871, 'learning_rate': 1.4400000000000001e-05, 'epoch': 1.05}\n",
" 30% 750/2500 [02:02<04:19, 6.74it/s][INFO|trainer.py:725] 2023-02-14 21:51:11,104 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:51:11,106 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:51:11,106 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:51:11,106 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.92it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.90it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 22.57it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.98it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 21.63it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 21.40it/s]\u001b[A\n",
" 26% 22/84 [00:00<00:02, 21.31it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 21.22it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 21.17it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 21.12it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 21.03it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 21.03it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 21.02it/s]\u001b[A\n",
" 51% 43/84 [00:01<00:01, 21.04it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 21.04it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 21.07it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 21.07it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 21.03it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 21.03it/s]\u001b[A\n",
" 76% 64/84 [00:02<00:00, 21.04it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
"{'eval_loss': 0.2168988287448883, 'eval_accuracy': 0.9235000014305115, 'eval_runtime': 3.9688, 'eval_samples_per_second': 503.925, 'eval_steps_per_second': 21.165, 'epoch': 1.12}\n",
"\n",
" 30% 750/2500 [02:06<04:19, 6.74it/s]\n",
"{'loss': 0.2285, 'learning_rate': 1.3600000000000002e-05, 'epoch': 1.2}\n",
"{'loss': 0.1888, 'learning_rate': 1.2800000000000001e-05, 'epoch': 1.35}\n",
"{'loss': 0.2106, 'learning_rate': 1.2e-05, 'epoch': 1.5}\n",
" 40% 1000/2500 [02:43<03:41, 6.78it/s][INFO|trainer.py:725] 2023-02-14 21:51:51,748 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:51:51,749 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:51:51,750 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:51:51,750 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 28.08it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.96it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 22.63it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.99it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 21.68it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 21.48it/s]\u001b[A\n",
" 26% 22/84 [00:00<00:02, 21.32it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 21.23it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 21.15it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 21.10it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 21.08it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 21.08it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 21.07it/s]\u001b[A\n",
" 51% 43/84 [00:01<00:01, 21.05it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 21.05it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 21.04it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 21.02it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 21.03it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 21.04it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 21.04it/s]\u001b[A\n",
" 76% 64/84 [00:02<00:00, 21.03it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 21.08it/s]\u001b[A\n",
"{'eval_loss': 0.19490236043930054, 'eval_accuracy': 0.9259999990463257, 'eval_runtime': 3.9658, 'eval_samples_per_second': 504.311, 'eval_steps_per_second': 21.181, 'epoch': 1.5}\n",
"\n",
" 40% 1000/2500 [02:46<03:41, 6.78it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:51:55,716 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-1000\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:51:55,717 >> Configuration saved in out/emotion/gpt2/checkpoint-1000/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:51:56,708 >> Model weights saved in out/emotion/gpt2/checkpoint-1000/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:51:56,709 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-1000/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:51:56,709 >> Special tokens file saved in out/emotion/gpt2/checkpoint-1000/special_tokens_map.json\n",
"{'loss': 0.1906, 'learning_rate': 1.1200000000000001e-05, 'epoch': 1.65}\n",
"{'loss': 0.1793, 'learning_rate': 1.04e-05, 'epoch': 1.8}\n",
" 50% 1250/2500 [03:26<03:04, 6.76it/s][INFO|trainer.py:725] 2023-02-14 21:52:35,220 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:52:35,222 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:52:35,222 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:52:35,222 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.99it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.91it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 22.61it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 22.00it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 21.66it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 21.45it/s]\u001b[A\n",
" 26% 22/84 [00:00<00:02, 21.34it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 21.26it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 21.21it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 21.17it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 21.14it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 21.11it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 21.12it/s]\u001b[A\n",
" 51% 43/84 [00:01<00:01, 21.11it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 21.10it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 21.09it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 21.10it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 21.09it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 21.06it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 21.08it/s]\u001b[A\n",
" 76% 64/84 [00:02<00:00, 21.09it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 21.09it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 21.08it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 21.08it/s]\u001b[A\n",
"{'eval_loss': 0.1607103943824768, 'eval_accuracy': 0.9319999814033508, 'eval_runtime': 3.9612, 'eval_samples_per_second': 504.895, 'eval_steps_per_second': 21.206, 'epoch': 1.87}\n",
"\n",
" 50% 1250/2500 [03:30<03:04, 6.76it/s]\n",
"{'loss': 0.2116, 'learning_rate': 9.600000000000001e-06, 'epoch': 1.95}\n",
"{'loss': 0.1536, 'learning_rate': 8.8e-06, 'epoch': 2.1}\n",
"{'loss': 0.1518, 'learning_rate': 8.000000000000001e-06, 'epoch': 2.25}\n",
" 60% 1500/2500 [04:07<02:26, 6.82it/s][INFO|trainer.py:725] 2023-02-14 21:53:15,831 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:53:15,833 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:53:15,833 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:53:15,833 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 28.10it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.90it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 22.58it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.85it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 21.53it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 21.37it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:02, 21.27it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 21.19it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 21.13it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 21.11it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 21.04it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.94it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.94it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:01, 20.94it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.97it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.97it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.98it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.93it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.94it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.98it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.97it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.99it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 21.02it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
"{'eval_loss': 0.160899356007576, 'eval_accuracy': 0.9330000281333923, 'eval_runtime': 3.9773, 'eval_samples_per_second': 502.855, 'eval_steps_per_second': 21.12, 'epoch': 2.25}\n",
"\n",
" 60% 1500/2500 [04:11<02:26, 6.82it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:53:19,811 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-1500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:53:19,812 >> Configuration saved in out/emotion/gpt2/checkpoint-1500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:53:21,455 >> Model weights saved in out/emotion/gpt2/checkpoint-1500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:53:21,456 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-1500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:53:21,456 >> Special tokens file saved in out/emotion/gpt2/checkpoint-1500/special_tokens_map.json\n",
"{'loss': 0.157, 'learning_rate': 7.2000000000000005e-06, 'epoch': 2.4}\n",
"{'loss': 0.141, 'learning_rate': 6.4000000000000006e-06, 'epoch': 2.55}\n",
" 70% 1750/2500 [04:51<01:50, 6.80it/s][INFO|trainer.py:725] 2023-02-14 21:54:00,007 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:54:00,009 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:54:00,009 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:54:00,009 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.89it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.82it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 22.49it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.85it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 21.48it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 21.31it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:02, 21.20it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 21.09it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 21.00it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.99it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 21.00it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.98it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.98it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 21.02it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 21.02it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 21.02it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 21.01it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 21.03it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
"{'eval_loss': 0.15204769372940063, 'eval_accuracy': 0.9319999814033508, 'eval_runtime': 3.9769, 'eval_samples_per_second': 502.901, 'eval_steps_per_second': 21.122, 'epoch': 2.62}\n",
"\n",
" 70% 1750/2500 [04:55<01:50, 6.80it/s]\n",
"{'loss': 0.1426, 'learning_rate': 5.600000000000001e-06, 'epoch': 2.7}\n",
"{'loss': 0.1463, 'learning_rate': 4.800000000000001e-06, 'epoch': 2.85}\n",
"{'loss': 0.1403, 'learning_rate': 4.000000000000001e-06, 'epoch': 3.0}\n",
" 80% 2000/2500 [05:31<01:13, 6.82it/s][INFO|trainer.py:725] 2023-02-14 21:54:40,633 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:54:40,635 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:54:40,635 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:54:40,635 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.95it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.86it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 22.54it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.95it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 21.60it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 21.42it/s]\u001b[A\n",
" 26% 22/84 [00:00<00:02, 21.29it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 21.14it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 21.10it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 21.07it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 21.08it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 21.05it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 21.06it/s]\u001b[A\n",
" 51% 43/84 [00:01<00:01, 21.04it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 21.00it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.96it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.97it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.96it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.97it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.94it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.95it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.95it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 21.02it/s]\u001b[A\n",
"{'eval_loss': 0.14609387516975403, 'eval_accuracy': 0.9290000200271606, 'eval_runtime': 3.9774, 'eval_samples_per_second': 502.846, 'eval_steps_per_second': 21.12, 'epoch': 3.0}\n",
"\n",
" 80% 2000/2500 [05:35<01:13, 6.82it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:54:44,614 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-2000\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:54:44,615 >> Configuration saved in out/emotion/gpt2/checkpoint-2000/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:54:46,838 >> Model weights saved in out/emotion/gpt2/checkpoint-2000/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:54:46,839 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-2000/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:54:46,839 >> Special tokens file saved in out/emotion/gpt2/checkpoint-2000/special_tokens_map.json\n",
"{'loss': 0.1256, 'learning_rate': 3.2000000000000003e-06, 'epoch': 3.15}\n",
"{'loss': 0.1246, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.3}\n",
" 90% 2250/2500 [06:16<00:36, 6.76it/s][INFO|trainer.py:725] 2023-02-14 21:55:25,309 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:55:25,311 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:55:25,311 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:55:25,311 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.89it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.86it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 22.52it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.87it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 21.57it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 21.40it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:02, 21.29it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 21.22it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 21.18it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 21.15it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 21.14it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 21.12it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 21.10it/s]\u001b[A\n",
" 51% 43/84 [00:01<00:01, 21.09it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 21.09it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 21.10it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 21.10it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 21.10it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 21.10it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 21.06it/s]\u001b[A\n",
" 76% 64/84 [00:02<00:00, 21.06it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 21.07it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 21.06it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 21.00it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 21.02it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 21.01it/s]\u001b[A\n",
"{'eval_loss': 0.15553689002990723, 'eval_accuracy': 0.9294999837875366, 'eval_runtime': 3.967, 'eval_samples_per_second': 504.158, 'eval_steps_per_second': 21.175, 'epoch': 3.37}\n",
"\n",
" 90% 2250/2500 [06:20<00:36, 6.76it/s]\n",
"{'loss': 0.1174, 'learning_rate': 1.6000000000000001e-06, 'epoch': 3.45}\n",
"{'loss': 0.1374, 'learning_rate': 8.000000000000001e-07, 'epoch': 3.6}\n",
"{'loss': 0.1207, 'learning_rate': 0.0, 'epoch': 3.75}\n",
"100% 2500/2500 [06:57<00:00, 6.82it/s][INFO|trainer.py:725] 2023-02-14 21:56:05,969 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:56:05,971 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:56:05,971 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:56:05,971 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.94it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.89it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 22.60it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.97it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 21.57it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 21.34it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:02, 21.23it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 21.12it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 21.09it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 21.09it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 21.07it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 21.06it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 21.01it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:01, 21.03it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 21.02it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.97it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.45it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.64it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.77it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.84it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.92it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.97it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.99it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 21.02it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 21.03it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 21.04it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 21.05it/s]\u001b[A\n",
"{'eval_loss': 0.15162073075771332, 'eval_accuracy': 0.9309999942779541, 'eval_runtime': 3.9841, 'eval_samples_per_second': 501.992, 'eval_steps_per_second': 21.084, 'epoch': 3.75}\n",
"\n",
"100% 2500/2500 [07:01<00:00, 6.82it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:56:09,956 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-2500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:56:09,957 >> Configuration saved in out/emotion/gpt2/checkpoint-2500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:56:10,953 >> Model weights saved in out/emotion/gpt2/checkpoint-2500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:56:10,954 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-2500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:56:10,954 >> Special tokens file saved in out/emotion/gpt2/checkpoint-2500/special_tokens_map.json\n",
"[INFO|trainer.py:1852] 2023-02-14 21:56:12,777 >> \n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"[INFO|trainer.py:1946] 2023-02-14 21:56:12,778 >> Loading best model from out/emotion/gpt2/checkpoint-1500 (score: 0.9330000281333923).\n",
"{'train_runtime': 424.4983, 'train_samples_per_second': 141.343, 'train_steps_per_second': 5.889, 'train_loss': 0.351297896194458, 'epoch': 3.75}\n",
"100% 2500/2500 [07:04<00:00, 5.89it/s]\n",
"[INFO|trainer.py:2656] 2023-02-14 21:56:13,218 >> Saving model checkpoint to out/emotion/gpt2\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:56:13,220 >> Configuration saved in out/emotion/gpt2/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:56:14,063 >> Model weights saved in out/emotion/gpt2/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:56:14,064 >> tokenizer config file saved in out/emotion/gpt2/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:56:14,064 >> Special tokens file saved in out/emotion/gpt2/special_tokens_map.json\n",
"***** train metrics *****\n",
" epoch = 3.75\n",
" train_loss = 0.3513\n",
" train_runtime = 0:07:04.49\n",
" train_samples = 16000\n",
" train_samples_per_second = 141.343\n",
" train_steps_per_second = 5.889\n",
"INFO:__main__:*** Evaluate ***\n",
"[INFO|trainer.py:725] 2023-02-14 21:56:14,169 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:56:14,170 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:56:14,170 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:56:14,170 >> Batch size = 24\n",
"100% 84/84 [00:03<00:00, 21.20it/s]\n",
"***** eval metrics *****\n",
" epoch = 3.75\n",
" eval_accuracy = 0.933\n",
" eval_loss = 0.1609\n",
" eval_runtime = 0:00:04.02\n",
" eval_samples = 2000\n",
" eval_samples_per_second = 497.496\n",
" eval_steps_per_second = 20.895\n",
"INFO:__main__:*** Predict ***\n",
"[INFO|trainer.py:725] 2023-02-14 21:56:18,194 >> The following columns in the test set don't have a corresponding argument in `GPT2ForSequenceClassification.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassification.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:56:18,195 >> ***** Running Prediction *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:56:18,195 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:56:18,195 >> Batch size = 24\n",
"100% 84/84 [00:03<00:00, 21.40it/s]\n",
"INFO:__main__:***** Predict results None *****\n",
"[INFO|modelcard.py:444] 2023-02-14 21:56:22,304 >> Dropping the following result as it does not have all the necessary fields:\n",
"{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.9330000281333923}]}\n"
]
}
],
"source": [
"!python run_glue.py \\\n",
" --cache_dir gtp_cache_training \\\n",
" --model_name_or_path gpt2 \\\n",
" --train_file data/train.json \\\n",
" --validation_file data/valid.json \\\n",
" --test_file data/test.json \\\n",
" --per_device_train_batch_size 24 \\\n",
" --per_device_eval_batch_size 24 \\\n",
" --do_train \\\n",
" --do_eval \\\n",
" --do_predict \\\n",
" --max_seq_length 128 \\\n",
" --learning_rate 2e-5 \\\n",
" --num_train_epochs 1 \\\n",
" --output_dir out/emotion/gpt2 \\\n",
" --overwrite_output_dir \\\n",
" --eval_steps 250 \\\n",
" --evaluation_strategy steps \\\n",
" --metric_for_best_model accuracy \\\n",
" --logging_steps 100 \\\n",
" --save_total_limit 5 \\\n",
" --max_steps 2500 \\\n",
" --load_best_model_at_end True "
]
},
{
"cell_type": "markdown",
"source": [
"- full dataset\n",
"- custom head"
],
"metadata": {
"id": "zJeUGay5n1JW"
}
},
{
"cell_type": "code",
"source": [
"!python run_glue.py \\\n",
" --cache_dir gtp_custom_cache_training \\\n",
" --model_name_or_path gpt2 \\\n",
" --custom_model gpt2_custom \\\n",
" --train_file data/train.json \\\n",
" --validation_file data/valid.json \\\n",
" --test_file data/test.json \\\n",
" --per_device_train_batch_size 24 \\\n",
" --per_device_eval_batch_size 24 \\\n",
" --do_train \\\n",
" --do_eval \\\n",
" --do_predict \\\n",
" --max_seq_length 128 \\\n",
" --learning_rate 2e-5 \\\n",
" --num_train_epochs 1 \\\n",
" --output_dir out/emotion/gpt2_custom \\\n",
" --overwrite_output_dir \\\n",
" --eval_steps 250 \\\n",
" --evaluation_strategy steps \\\n",
" --metric_for_best_model accuracy \\\n",
" --logging_steps 100 \\\n",
" --save_total_limit 5 \\\n",
" --max_steps 2500 \\\n",
" --load_best_model_at_end True "
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "LXRMDiD-n1nG",
"outputId": "1383e6a3-b485-49a0-d111-05bea71acd23"
},
"execution_count": 11,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"2023-02-14 21:56:25.884599: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2023-02-14 21:56:26.040127: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
"2023-02-14 21:56:26.823479: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 21:56:26.823595: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 21:56:26.823615: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
"WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
"INFO:__main__:Training/evaluation parameters TrainingArguments(\n",
"_n_gpu=1,\n",
"adafactor=False,\n",
"adam_beta1=0.9,\n",
"adam_beta2=0.999,\n",
"adam_epsilon=1e-08,\n",
"auto_find_batch_size=False,\n",
"bf16=False,\n",
"bf16_full_eval=False,\n",
"data_seed=None,\n",
"dataloader_drop_last=False,\n",
"dataloader_num_workers=0,\n",
"dataloader_pin_memory=True,\n",
"ddp_bucket_cap_mb=None,\n",
"ddp_find_unused_parameters=None,\n",
"ddp_timeout=1800,\n",
"debug=[],\n",
"deepspeed=None,\n",
"disable_tqdm=False,\n",
"do_eval=True,\n",
"do_predict=True,\n",
"do_train=True,\n",
"eval_accumulation_steps=None,\n",
"eval_delay=0,\n",
"eval_steps=250,\n",
"evaluation_strategy=steps,\n",
"fp16=False,\n",
"fp16_backend=auto,\n",
"fp16_full_eval=False,\n",
"fp16_opt_level=O1,\n",
"fsdp=[],\n",
"fsdp_min_num_params=0,\n",
"fsdp_transformer_layer_cls_to_wrap=None,\n",
"full_determinism=False,\n",
"gradient_accumulation_steps=1,\n",
"gradient_checkpointing=False,\n",
"greater_is_better=True,\n",
"group_by_length=False,\n",
"half_precision_backend=auto,\n",
"hub_model_id=None,\n",
"hub_private_repo=False,\n",
"hub_strategy=every_save,\n",
"hub_token=<HUB_TOKEN>,\n",
"ignore_data_skip=False,\n",
"include_inputs_for_metrics=False,\n",
"jit_mode_eval=False,\n",
"label_names=None,\n",
"label_smoothing_factor=0.0,\n",
"learning_rate=2e-05,\n",
"length_column_name=length,\n",
"load_best_model_at_end=True,\n",
"local_rank=-1,\n",
"log_level=passive,\n",
"log_level_replica=passive,\n",
"log_on_each_node=True,\n",
"logging_dir=out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00,\n",
"logging_first_step=False,\n",
"logging_nan_inf_filter=True,\n",
"logging_steps=100,\n",
"logging_strategy=steps,\n",
"lr_scheduler_type=linear,\n",
"max_grad_norm=1.0,\n",
"max_steps=2500,\n",
"metric_for_best_model=accuracy,\n",
"mp_parameters=,\n",
"no_cuda=False,\n",
"num_train_epochs=1.0,\n",
"optim=adamw_hf,\n",
"output_dir=out/emotion/gpt2_custom,\n",
"overwrite_output_dir=True,\n",
"past_index=-1,\n",
"per_device_eval_batch_size=24,\n",
"per_device_train_batch_size=24,\n",
"prediction_loss_only=False,\n",
"push_to_hub=False,\n",
"push_to_hub_model_id=None,\n",
"push_to_hub_organization=None,\n",
"push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
"ray_scope=last,\n",
"remove_unused_columns=True,\n",
"report_to=['tensorboard'],\n",
"resume_from_checkpoint=None,\n",
"run_name=out/emotion/gpt2_custom,\n",
"save_on_each_node=False,\n",
"save_steps=500,\n",
"save_strategy=steps,\n",
"save_total_limit=5,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torchdynamo=None,\n",
"tpu_metrics_debug=False,\n",
"tpu_num_cores=None,\n",
"use_ipex=False,\n",
"use_legacy_prediction_loop=False,\n",
"use_mps_device=False,\n",
"warmup_ratio=0.0,\n",
"warmup_steps=0,\n",
"weight_decay=0.0,\n",
"xpu_backend=None,\n",
")\n",
"INFO:__main__:load a local file for train: data/train.json\n",
"INFO:__main__:load a local file for validation: data/valid.json\n",
"INFO:__main__:load a local file for test: data/test.json\n",
"WARNING:datasets.builder:Using custom data configuration default-01aa9d8252a24a0d\n",
"INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n",
"INFO:datasets.builder:Generating dataset json (/content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
"Downloading and preparing dataset json/default to /content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n",
"Downloading data files: 100% 3/3 [00:00<00:00, 14138.10it/s]\n",
"INFO:datasets.download.download_manager:Downloading took 0.0 min\n",
"INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n",
"Extracting data files: 100% 3/3 [00:00<00:00, 2175.09it/s]\n",
"INFO:datasets.utils.info_utils:Unable to verify checksums.\n",
"INFO:datasets.builder:Generating train split\n",
"INFO:datasets.builder:Generating validation split\n",
"INFO:datasets.builder:Generating test split\n",
"INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n",
"Dataset json downloaded and prepared to /content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n",
"100% 3/3 [00:00<00:00, 672.49it/s]\n",
"Downloading (…)lve/main/config.json: 100% 665/665 [00:00<00:00, 123kB/s]\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:56:30,068 >> loading configuration file config.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:56:30,068 >> Model config GPT2Config {\n",
" \"_name_or_path\": \"gpt2\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"id2label\": {\n",
" \"0\": \"LABEL_0\",\n",
" \"1\": \"LABEL_1\",\n",
" \"2\": \"LABEL_2\",\n",
" \"3\": \"LABEL_3\",\n",
" \"4\": \"LABEL_4\",\n",
" \"5\": \"LABEL_5\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"label2id\": {\n",
" \"LABEL_0\": 0,\n",
" \"LABEL_1\": 1,\n",
" \"LABEL_2\": 2,\n",
" \"LABEL_3\": 3,\n",
" \"LABEL_4\": 4,\n",
" \"LABEL_5\": 5\n",
" },\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"reorder_and_upcast_attn\": false,\n",
" \"resid_pdrop\": 0.1,\n",
" \"scale_attn_by_inverse_layer_idx\": false,\n",
" \"scale_attn_weights\": true,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
"[INFO|tokenization_auto.py:418] 2023-02-14 21:56:30,162 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:56:30,251 >> loading configuration file config.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:56:30,252 >> Model config GPT2Config {\n",
" \"_name_or_path\": \"gpt2\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"initializer_range\": 0.02,\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"reorder_and_upcast_attn\": false,\n",
" \"resid_pdrop\": 0.1,\n",
" \"scale_attn_by_inverse_layer_idx\": false,\n",
" \"scale_attn_weights\": true,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
"Downloading (…)olve/main/vocab.json: 100% 1.04M/1.04M [00:00<00:00, 9.18MB/s]\n",
"Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 4.90MB/s]\n",
"Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 14.3MB/s]\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file vocab.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/vocab.json\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file merges.txt from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/merges.txt\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file tokenizer.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer.json\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file added_tokens.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file special_tokens_map.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 21:56:31,525 >> loading file tokenizer_config.json from cache at None\n",
"[INFO|configuration_utils.py:653] 2023-02-14 21:56:31,525 >> loading configuration file config.json from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 21:56:31,526 >> Model config GPT2Config {\n",
" \"_name_or_path\": \"gpt2\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"initializer_range\": 0.02,\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"reorder_and_upcast_attn\": false,\n",
" \"resid_pdrop\": 0.1,\n",
" \"scale_attn_by_inverse_layer_idx\": false,\n",
" \"scale_attn_weights\": true,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
"INFO:__main__:Using hidden states in model: False\n",
"INFO:__main__:Using implementation from class: GPT2ForSequenceClassificationCustom\n",
"Downloading (…)\"pytorch_model.bin\";: 100% 548M/548M [00:05<00:00, 108MB/s]\n",
"[INFO|modeling_utils.py:2156] 2023-02-14 21:56:36,895 >> loading weights file pytorch_model.bin from cache at gtp_custom_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin\n",
"[INFO|modeling_utils.py:2606] 2023-02-14 21:56:39,410 >> All model checkpoint weights were used when initializing GPT2ForSequenceClassificationCustom.\n",
"\n",
"[WARNING|modeling_utils.py:2608] 2023-02-14 21:56:39,410 >> Some weights of GPT2ForSequenceClassificationCustom were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.dense_1_hidden.bias', 'score.dense_1_input.weight', 'score.dense_2.bias', 'score.dense_2.weight', 'score.out_proj.weight', 'score.dense_1_hidden.weight', 'score.dense_1_input.bias']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
"[ERROR|tokenization_utils_base.py:1019] 2023-02-14 21:56:39,418 >> Using pad_token, but it is not set yet.\n",
"INFO:__main__:Set PAD token to EOS: <|endoftext|>\n",
"Running tokenizer on dataset: 0% 0/16 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-bb8faaac56c0b87e.arrow\n",
"Running tokenizer on dataset: 100% 16/16 [00:00<00:00, 19.61ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-7b339bb99d7c17a1.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 20.48ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/gtp_custom_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-82acdaa33d6aa0eb.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 7.71ba/s]\n",
"INFO:__main__:Sample 10476 of the training set: {'label': 0, 'text': 'i do find new friends i m going to try extra hard to make them stay and if i decide that i don t want to feel hurt again and just ride out the last year of school on my own i m going to have to try extra hard not to care what people think of me being a loner', 'input_ids': [72, 466, 1064, 649, 2460, 1312, 285, 1016, 284, 1949, 3131, 1327, 284, 787, 606, 2652, 290, 611, 1312, 5409, 326, 1312, 836, 256, 765, 284, 1254, 5938, 757, 290, 655, 6594, 503, 262, 938, 614, 286, 1524, 319, 616, 898, 1312, 285, 1016, 284, 423, 284, 1949, 3131, 1327, 407, 284, 1337, 644, 661, 892, 286, 502, 852, 257, 300, 14491, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 1824 of the training set: {'label': 1, 'text': 'i asked them to join me in creating a world where all year old girls could grow up feeling hopeful and powerful', 'input_ids': [72, 1965, 606, 284, 4654, 502, 287, 4441, 257, 995, 810, 477, 614, 1468, 4813, 714, 1663, 510, 4203, 17836, 290, 3665, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 409 of the training set: {'label': 2, 'text': 'i feel when you are a caring person you attract other caring people into your life', 'input_ids': [72, 1254, 618, 345, 389, 257, 18088, 1048, 345, 4729, 584, 18088, 661, 656, 534, 1204, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"[INFO|trainer.py:503] 2023-02-14 21:56:42,941 >> max_steps is given, it will override any value given in num_train_epochs\n",
"[INFO|trainer.py:725] 2023-02-14 21:56:42,941 >> The following columns in the training set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" warnings.warn(\n",
"[INFO|trainer.py:1607] 2023-02-14 21:56:42,947 >> ***** Running training *****\n",
"[INFO|trainer.py:1608] 2023-02-14 21:56:42,947 >> Num examples = 16000\n",
"[INFO|trainer.py:1609] 2023-02-14 21:56:42,947 >> Num Epochs = 4\n",
"[INFO|trainer.py:1610] 2023-02-14 21:56:42,947 >> Instantaneous batch size per device = 24\n",
"[INFO|trainer.py:1611] 2023-02-14 21:56:42,947 >> Total train batch size (w. parallel, distributed & accumulation) = 24\n",
"[INFO|trainer.py:1612] 2023-02-14 21:56:42,947 >> Gradient Accumulation steps = 1\n",
"[INFO|trainer.py:1613] 2023-02-14 21:56:42,947 >> Total optimization steps = 2500\n",
"{'loss': 1.6218, 'learning_rate': 1.9200000000000003e-05, 'epoch': 0.15}\n",
"{'loss': 1.1593, 'learning_rate': 1.8400000000000003e-05, 'epoch': 0.3}\n",
" 10% 250/2500 [00:39<05:43, 6.56it/s][INFO|trainer.py:725] 2023-02-14 21:57:22,025 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:57:22,027 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:57:22,027 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:57:22,027 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 26.97it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 22.99it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.78it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.18it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.86it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.66it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.55it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.44it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.32it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.32it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.31it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.30it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.31it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.32it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.29it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.27it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.23it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.22it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.23it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 20.22it/s]\u001b[A\n",
"{'eval_loss': 0.6981180310249329, 'eval_accuracy': 0.7329999804496765, 'eval_runtime': 4.1201, 'eval_samples_per_second': 485.426, 'eval_steps_per_second': 20.388, 'epoch': 0.37}\n",
"\n",
" 10% 250/2500 [00:43<05:43, 6.56it/s]\n",
"{'loss': 0.8016, 'learning_rate': 1.76e-05, 'epoch': 0.45}\n",
"{'loss': 0.5481, 'learning_rate': 1.6800000000000002e-05, 'epoch': 0.6}\n",
"{'loss': 0.4045, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.75}\n",
" 20% 500/2500 [01:21<05:03, 6.58it/s][INFO|trainer.py:725] 2023-02-14 21:58:04,246 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:58:04,248 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:58:04,248 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:58:04,248 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 26.97it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.02it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.78it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.20it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.86it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.19it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.20it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.21it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.22it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.23it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.23it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.24it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.25it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.24it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.25it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.24it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.26it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.25it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.25it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.24it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.24it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
" 98% 82/84 [00:04<00:00, 20.24it/s]\u001b[A\n",
"{'eval_loss': 0.29522550106048584, 'eval_accuracy': 0.9100000262260437, 'eval_runtime': 4.1309, 'eval_samples_per_second': 484.153, 'eval_steps_per_second': 20.334, 'epoch': 0.75}\n",
"\n",
" 20% 500/2500 [01:25<05:03, 6.58it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:58:08,380 >> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:58:08,381 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:58:09,983 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:58:09,984 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:58:09,984 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-500/special_tokens_map.json\n",
"{'loss': 0.356, 'learning_rate': 1.5200000000000002e-05, 'epoch': 0.9}\n",
"{'loss': 0.2714, 'learning_rate': 1.4400000000000001e-05, 'epoch': 1.05}\n",
" 30% 750/2500 [02:07<04:25, 6.59it/s][INFO|trainer.py:725] 2023-02-14 21:58:49,972 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:58:49,973 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:58:49,974 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:58:49,974 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.06it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.11it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.85it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.25it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.89it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.67it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.56it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.48it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.42it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.39it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.37it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.34it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.31it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.32it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.29it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.25it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.27it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.31it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
"{'eval_loss': 0.22870442271232605, 'eval_accuracy': 0.9200000166893005, 'eval_runtime': 4.1118, 'eval_samples_per_second': 486.403, 'eval_steps_per_second': 20.429, 'epoch': 1.12}\n",
"\n",
" 30% 750/2500 [02:11<04:25, 6.59it/s]\n",
"{'loss': 0.2332, 'learning_rate': 1.3600000000000002e-05, 'epoch': 1.2}\n",
"{'loss': 0.2135, 'learning_rate': 1.2800000000000001e-05, 'epoch': 1.35}\n",
"{'loss': 0.2283, 'learning_rate': 1.2e-05, 'epoch': 1.5}\n",
" 40% 1000/2500 [02:49<03:48, 6.57it/s][INFO|trainer.py:725] 2023-02-14 21:59:32,169 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 21:59:32,170 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 21:59:32,170 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 21:59:32,171 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.03it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.07it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.78it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.17it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.84it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.62it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.52it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.39it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.36it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.33it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.31it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.28it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.30it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.14it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.18it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.20it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.22it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.24it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.26it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.29it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.31it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 98% 82/84 [00:04<00:00, 20.25it/s]\u001b[A\n",
"{'eval_loss': 0.16501356661319733, 'eval_accuracy': 0.9319999814033508, 'eval_runtime': 4.1217, 'eval_samples_per_second': 485.232, 'eval_steps_per_second': 20.38, 'epoch': 1.5}\n",
"\n",
" 40% 1000/2500 [02:53<03:48, 6.57it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 21:59:36,293 >> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-1000\n",
"[INFO|configuration_utils.py:447] 2023-02-14 21:59:36,294 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-1000/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 21:59:37,744 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-1000/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 21:59:37,744 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-1000/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 21:59:37,744 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-1000/special_tokens_map.json\n",
"{'loss': 0.1836, 'learning_rate': 1.1200000000000001e-05, 'epoch': 1.65}\n",
"{'loss': 0.1844, 'learning_rate': 1.04e-05, 'epoch': 1.8}\n",
" 50% 1250/2500 [03:34<03:09, 6.59it/s][INFO|trainer.py:725] 2023-02-14 22:00:17,827 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 22:00:17,829 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:00:17,829 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:00:17,829 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.06it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.06it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.79it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.21it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.88it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.65it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.55it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.47it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.34it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.30it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.27it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.28it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.26it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.26it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.29it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.29it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
"{'eval_loss': 0.15909001231193542, 'eval_accuracy': 0.9355000257492065, 'eval_runtime': 4.1177, 'eval_samples_per_second': 485.712, 'eval_steps_per_second': 20.4, 'epoch': 1.87}\n",
"\n",
" 50% 1250/2500 [03:38<03:09, 6.59it/s]\n",
"{'loss': 0.2181, 'learning_rate': 9.600000000000001e-06, 'epoch': 1.95}\n",
"{'loss': 0.1695, 'learning_rate': 8.8e-06, 'epoch': 2.1}\n",
"{'loss': 0.1683, 'learning_rate': 8.000000000000001e-06, 'epoch': 2.25}\n",
" 60% 1500/2500 [04:17<02:32, 6.55it/s][INFO|trainer.py:725] 2023-02-14 22:00:59,986 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 22:00:59,988 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:00:59,988 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:00:59,988 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.10it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.06it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.79it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.16it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.86it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.65it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.52it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.45it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.30it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.24it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.11it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.12it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.17it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.19it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.22it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.20it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.23it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.23it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.24it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.22it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.24it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.23it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.20it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.22it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.20it/s]\u001b[A\n",
" 98% 82/84 [00:04<00:00, 20.20it/s]\u001b[A\n",
"{'eval_loss': 0.1472882628440857, 'eval_accuracy': 0.934499979019165, 'eval_runtime': 4.13, 'eval_samples_per_second': 484.258, 'eval_steps_per_second': 20.339, 'epoch': 2.25}\n",
"\n",
" 60% 1500/2500 [04:21<02:32, 6.55it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:01:04,119 >> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-1500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:01:04,120 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-1500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:01:05,576 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-1500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:01:05,576 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-1500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:01:05,576 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-1500/special_tokens_map.json\n",
"{'loss': 0.1497, 'learning_rate': 7.2000000000000005e-06, 'epoch': 2.4}\n",
"{'loss': 0.1496, 'learning_rate': 6.4000000000000006e-06, 'epoch': 2.55}\n",
" 70% 1750/2500 [05:02<01:54, 6.54it/s][INFO|trainer.py:725] 2023-02-14 22:01:45,617 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 22:01:45,618 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:01:45,619 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:01:45,619 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 26.78it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 22.79it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.58it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.03it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.70it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.49it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.30it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.22it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.19it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.16it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.15it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.14it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.12it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.09it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.08it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.10it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.13it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.19it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.20it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.22it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.21it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.22it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.25it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 98% 82/84 [00:04<00:00, 20.25it/s]\u001b[A\n",
"{'eval_loss': 0.14743593335151672, 'eval_accuracy': 0.9359999895095825, 'eval_runtime': 4.1413, 'eval_samples_per_second': 482.944, 'eval_steps_per_second': 20.284, 'epoch': 2.62}\n",
"\n",
" 70% 1750/2500 [05:06<01:54, 6.54it/s]\n",
"{'loss': 0.1465, 'learning_rate': 5.600000000000001e-06, 'epoch': 2.7}\n",
"{'loss': 0.1376, 'learning_rate': 4.800000000000001e-06, 'epoch': 2.85}\n",
"{'loss': 0.1444, 'learning_rate': 4.000000000000001e-06, 'epoch': 3.0}\n",
" 80% 2000/2500 [05:44<01:16, 6.57it/s][INFO|trainer.py:725] 2023-02-14 22:02:27,845 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 22:02:27,846 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:02:27,846 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:02:27,846 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.04it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.04it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.75it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.18it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.85it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.61it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.49it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.43it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.39it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.14it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.16it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.21it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.22it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.22it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.20it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.19it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.20it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.22it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.24it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.24it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.24it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.21it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.21it/s]\u001b[A\n",
" 98% 82/84 [00:04<00:00, 20.20it/s]\u001b[A\n",
"{'eval_loss': 0.14364145696163177, 'eval_accuracy': 0.9365000128746033, 'eval_runtime': 4.1279, 'eval_samples_per_second': 484.505, 'eval_steps_per_second': 20.349, 'epoch': 3.0}\n",
"\n",
" 80% 2000/2500 [05:49<01:16, 6.57it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:02:31,975 >> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-2000\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:02:31,976 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-2000/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:02:33,429 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-2000/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:02:33,430 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-2000/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:02:33,430 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-2000/special_tokens_map.json\n",
"{'loss': 0.104, 'learning_rate': 3.2000000000000003e-06, 'epoch': 3.15}\n",
"{'loss': 0.1206, 'learning_rate': 2.4000000000000003e-06, 'epoch': 3.3}\n",
" 90% 2250/2500 [06:30<00:38, 6.55it/s][INFO|trainer.py:725] 2023-02-14 22:03:13,484 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 22:03:13,486 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:03:13,486 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:03:13,486 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.11it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.10it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.81it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.22it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.88it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.68it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.56it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.47it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.41it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.38it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.34it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.34it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.33it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.26it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.26it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.17it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.21it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.21it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.23it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.25it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.28it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.29it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
"{'eval_loss': 0.15543130040168762, 'eval_accuracy': 0.9369999766349792, 'eval_runtime': 4.1171, 'eval_samples_per_second': 485.782, 'eval_steps_per_second': 20.403, 'epoch': 3.37}\n",
"\n",
" 90% 2250/2500 [06:34<00:38, 6.55it/s]\n",
"{'loss': 0.1289, 'learning_rate': 1.6000000000000001e-06, 'epoch': 3.45}\n",
"{'loss': 0.1231, 'learning_rate': 8.000000000000001e-07, 'epoch': 3.6}\n",
"{'loss': 0.1179, 'learning_rate': 0.0, 'epoch': 3.75}\n",
"100% 2500/2500 [07:12<00:00, 6.57it/s][INFO|trainer.py:725] 2023-02-14 22:03:55,704 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 22:03:55,705 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:03:55,705 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:03:55,706 >> Batch size = 24\n",
"\n",
" 0% 0/84 [00:00<?, ?it/s]\u001b[A\n",
" 5% 4/84 [00:00<00:02, 27.06it/s]\u001b[A\n",
" 8% 7/84 [00:00<00:03, 23.11it/s]\u001b[A\n",
" 12% 10/84 [00:00<00:03, 21.81it/s]\u001b[A\n",
" 15% 13/84 [00:00<00:03, 21.13it/s]\u001b[A\n",
" 19% 16/84 [00:00<00:03, 20.82it/s]\u001b[A\n",
" 23% 19/84 [00:00<00:03, 20.65it/s]\u001b[A\n",
" 26% 22/84 [00:01<00:03, 20.47it/s]\u001b[A\n",
" 30% 25/84 [00:01<00:02, 20.41it/s]\u001b[A\n",
" 33% 28/84 [00:01<00:02, 20.38it/s]\u001b[A\n",
" 37% 31/84 [00:01<00:02, 20.35it/s]\u001b[A\n",
" 40% 34/84 [00:01<00:02, 20.35it/s]\u001b[A\n",
" 44% 37/84 [00:01<00:02, 20.32it/s]\u001b[A\n",
" 48% 40/84 [00:01<00:02, 20.30it/s]\u001b[A\n",
" 51% 43/84 [00:02<00:02, 20.30it/s]\u001b[A\n",
" 55% 46/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
" 58% 49/84 [00:02<00:01, 20.30it/s]\u001b[A\n",
" 62% 52/84 [00:02<00:01, 20.29it/s]\u001b[A\n",
" 65% 55/84 [00:02<00:01, 20.31it/s]\u001b[A\n",
" 69% 58/84 [00:02<00:01, 20.28it/s]\u001b[A\n",
" 73% 61/84 [00:02<00:01, 20.26it/s]\u001b[A\n",
" 76% 64/84 [00:03<00:00, 20.24it/s]\u001b[A\n",
" 80% 67/84 [00:03<00:00, 20.26it/s]\u001b[A\n",
" 83% 70/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 87% 73/84 [00:03<00:00, 20.27it/s]\u001b[A\n",
" 90% 76/84 [00:03<00:00, 20.29it/s]\u001b[A\n",
" 94% 79/84 [00:03<00:00, 20.29it/s]\u001b[A\n",
" 98% 82/84 [00:03<00:00, 20.30it/s]\u001b[A\n",
"{'eval_loss': 0.14437170326709747, 'eval_accuracy': 0.9350000023841858, 'eval_runtime': 4.116, 'eval_samples_per_second': 485.915, 'eval_steps_per_second': 20.408, 'epoch': 3.75}\n",
"\n",
"100% 2500/2500 [07:16<00:00, 6.57it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:03:59,822 >> Saving model checkpoint to out/emotion/gpt2_custom/checkpoint-2500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:03:59,823 >> Configuration saved in out/emotion/gpt2_custom/checkpoint-2500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:04:00,568 >> Model weights saved in out/emotion/gpt2_custom/checkpoint-2500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:04:00,569 >> tokenizer config file saved in out/emotion/gpt2_custom/checkpoint-2500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:04:00,569 >> Special tokens file saved in out/emotion/gpt2_custom/checkpoint-2500/special_tokens_map.json\n",
"[INFO|trainer.py:1852] 2023-02-14 22:04:02,582 >> \n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"[INFO|trainer.py:1946] 2023-02-14 22:04:02,582 >> Loading best model from out/emotion/gpt2_custom/checkpoint-2000 (score: 0.9365000128746033).\n",
"{'train_runtime': 440.0758, 'train_samples_per_second': 136.34, 'train_steps_per_second': 5.681, 'train_loss': 0.32335229415893557, 'epoch': 3.75}\n",
"100% 2500/2500 [07:20<00:00, 5.68it/s]\n",
"[INFO|trainer.py:2656] 2023-02-14 22:04:03,025 >> Saving model checkpoint to out/emotion/gpt2_custom\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:04:03,026 >> Configuration saved in out/emotion/gpt2_custom/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:04:03,965 >> Model weights saved in out/emotion/gpt2_custom/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:04:03,966 >> tokenizer config file saved in out/emotion/gpt2_custom/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:04:03,966 >> Special tokens file saved in out/emotion/gpt2_custom/special_tokens_map.json\n",
"***** train metrics *****\n",
" epoch = 3.75\n",
" train_loss = 0.3234\n",
" train_runtime = 0:07:20.07\n",
" train_samples = 16000\n",
" train_samples_per_second = 136.34\n",
" train_steps_per_second = 5.681\n",
"INFO:__main__:*** Evaluate ***\n",
"[INFO|trainer.py:725] 2023-02-14 22:04:04,068 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 22:04:04,069 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:04:04,069 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:04:04,070 >> Batch size = 24\n",
"100% 84/84 [00:04<00:00, 20.35it/s]\n",
"***** eval metrics *****\n",
" epoch = 3.75\n",
" eval_accuracy = 0.9365\n",
" eval_loss = 0.1436\n",
" eval_runtime = 0:00:04.18\n",
" eval_samples = 2000\n",
" eval_samples_per_second = 477.778\n",
" eval_steps_per_second = 20.067\n",
"INFO:__main__:*** Predict ***\n",
"[INFO|trainer.py:725] 2023-02-14 22:04:08,259 >> The following columns in the test set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2907] 2023-02-14 22:04:08,260 >> ***** Running Prediction *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:04:08,260 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:04:08,260 >> Batch size = 24\n",
"100% 84/84 [00:04<00:00, 20.62it/s]\n",
"INFO:__main__:***** Predict results None *****\n",
"[INFO|modelcard.py:444] 2023-02-14 22:04:12,537 >> Dropping the following result as it does not have all the necessary fields:\n",
"{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.9365000128746033}]}\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "VrHmnOaT7ICl"
},
"source": [
"## **T5**"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "CmuDde477ICl"
},
"source": [
"- full data\n",
"- model `T5`\n",
"- sequnece length: 128\n",
"- training epoch: 1\n",
"- first few layers frozen"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"id": "2ruXjeqj7ICl",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "4d73b407-08c3-4007-aa32-c8709dd696fa"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"2023-02-14 22:04:17.129470: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2023-02-14 22:04:17.281426: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
"2023-02-14 22:04:18.087509: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 22:04:18.087605: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-14 22:04:18.087624: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
"WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
"INFO:__main__:Training/evaluation parameters Seq2SeqTrainingArguments(\n",
"_n_gpu=1,\n",
"adafactor=False,\n",
"adam_beta1=0.9,\n",
"adam_beta2=0.999,\n",
"adam_epsilon=1e-08,\n",
"auto_find_batch_size=False,\n",
"bf16=False,\n",
"bf16_full_eval=False,\n",
"data_seed=None,\n",
"dataloader_drop_last=False,\n",
"dataloader_num_workers=0,\n",
"dataloader_pin_memory=True,\n",
"ddp_bucket_cap_mb=None,\n",
"ddp_find_unused_parameters=None,\n",
"ddp_timeout=1800,\n",
"debug=[],\n",
"deepspeed=None,\n",
"disable_tqdm=False,\n",
"do_eval=True,\n",
"do_predict=True,\n",
"do_train=True,\n",
"eval_accumulation_steps=None,\n",
"eval_delay=0,\n",
"eval_steps=250,\n",
"evaluation_strategy=steps,\n",
"fp16=False,\n",
"fp16_backend=auto,\n",
"fp16_full_eval=False,\n",
"fp16_opt_level=O1,\n",
"fsdp=[],\n",
"fsdp_min_num_params=0,\n",
"fsdp_transformer_layer_cls_to_wrap=None,\n",
"full_determinism=False,\n",
"generation_max_length=128,\n",
"generation_num_beams=None,\n",
"gradient_accumulation_steps=1,\n",
"gradient_checkpointing=False,\n",
"greater_is_better=True,\n",
"group_by_length=False,\n",
"half_precision_backend=auto,\n",
"hub_model_id=None,\n",
"hub_private_repo=False,\n",
"hub_strategy=every_save,\n",
"hub_token=<HUB_TOKEN>,\n",
"ignore_data_skip=False,\n",
"include_inputs_for_metrics=False,\n",
"jit_mode_eval=False,\n",
"label_names=None,\n",
"label_smoothing_factor=0.0,\n",
"learning_rate=5e-05,\n",
"length_column_name=length,\n",
"load_best_model_at_end=True,\n",
"local_rank=-1,\n",
"log_level=passive,\n",
"log_level_replica=passive,\n",
"log_on_each_node=True,\n",
"logging_dir=out/emotion/t5_v1_1/runs/Feb14_22-04-20_fc0011e45a00,\n",
"logging_first_step=False,\n",
"logging_nan_inf_filter=True,\n",
"logging_steps=100,\n",
"logging_strategy=steps,\n",
"lr_scheduler_type=linear,\n",
"max_grad_norm=1.0,\n",
"max_steps=2500,\n",
"metric_for_best_model=accuracy,\n",
"mp_parameters=,\n",
"no_cuda=False,\n",
"num_train_epochs=1.0,\n",
"optim=adamw_hf,\n",
"output_dir=out/emotion/t5_v1_1,\n",
"overwrite_output_dir=True,\n",
"past_index=-1,\n",
"per_device_eval_batch_size=8,\n",
"per_device_train_batch_size=8,\n",
"predict_with_generate=True,\n",
"prediction_loss_only=False,\n",
"push_to_hub=False,\n",
"push_to_hub_model_id=None,\n",
"push_to_hub_organization=None,\n",
"push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
"ray_scope=last,\n",
"remove_unused_columns=True,\n",
"report_to=['tensorboard'],\n",
"resume_from_checkpoint=None,\n",
"run_name=out/emotion/t5_v1_1,\n",
"save_on_each_node=False,\n",
"save_steps=500,\n",
"save_strategy=steps,\n",
"save_total_limit=5,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"sortish_sampler=False,\n",
"tf32=None,\n",
"torchdynamo=None,\n",
"tpu_metrics_debug=False,\n",
"tpu_num_cores=None,\n",
"use_ipex=False,\n",
"use_legacy_prediction_loop=False,\n",
"use_mps_device=False,\n",
"warmup_ratio=0.0,\n",
"warmup_steps=0,\n",
"weight_decay=0.0,\n",
"xpu_backend=None,\n",
")\n",
"WARNING:datasets.builder:Using custom data configuration default-a82ca4164dba097e\n",
"INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n",
"INFO:datasets.builder:Generating dataset json (/content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
"Downloading and preparing dataset json/default to /content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n",
"Downloading data files: 100% 3/3 [00:00<00:00, 11848.32it/s]\n",
"INFO:datasets.download.download_manager:Downloading took 0.0 min\n",
"INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n",
"Extracting data files: 100% 3/3 [00:00<00:00, 2097.85it/s]\n",
"INFO:datasets.utils.info_utils:Unable to verify checksums.\n",
"INFO:datasets.builder:Generating train split\n",
"INFO:datasets.builder:Generating validation split\n",
"INFO:datasets.builder:Generating test split\n",
"INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n",
"Dataset json downloaded and prepared to /content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n",
"100% 3/3 [00:00<00:00, 953.83it/s]\n",
"Downloading (…)lve/main/config.json: 100% 537/537 [00:00<00:00, 97.0kB/s]\n",
"[INFO|configuration_utils.py:653] 2023-02-14 22:04:20,972 >> loading configuration file config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 22:04:20,975 >> Model config T5Config {\n",
" \"_name_or_path\": \"google/t5-v1_1-small\",\n",
" \"architectures\": [\n",
" \"T5ForConditionalGeneration\"\n",
" ],\n",
" \"d_ff\": 1024,\n",
" \"d_kv\": 64,\n",
" \"d_model\": 512,\n",
" \"decoder_start_token_id\": 0,\n",
" \"dense_act_fn\": \"gelu_new\",\n",
" \"dropout_rate\": 0.1,\n",
" \"eos_token_id\": 1,\n",
" \"feed_forward_proj\": \"gated-gelu\",\n",
" \"initializer_factor\": 1.0,\n",
" \"is_encoder_decoder\": true,\n",
" \"is_gated_act\": true,\n",
" \"layer_norm_epsilon\": 1e-06,\n",
" \"model_type\": \"t5\",\n",
" \"num_decoder_layers\": 8,\n",
" \"num_heads\": 6,\n",
" \"num_layers\": 8,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": 0,\n",
" \"relative_attention_max_distance\": 128,\n",
" \"relative_attention_num_buckets\": 32,\n",
" \"tie_word_embeddings\": false,\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 32128\n",
"}\n",
"\n",
"Downloading (…)okenizer_config.json: 100% 1.86k/1.86k [00:00<00:00, 853kB/s]\n",
"[INFO|configuration_utils.py:653] 2023-02-14 22:04:21,160 >> loading configuration file config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 22:04:21,160 >> Model config T5Config {\n",
" \"_name_or_path\": \"google/t5-v1_1-small\",\n",
" \"architectures\": [\n",
" \"T5ForConditionalGeneration\"\n",
" ],\n",
" \"d_ff\": 1024,\n",
" \"d_kv\": 64,\n",
" \"d_model\": 512,\n",
" \"decoder_start_token_id\": 0,\n",
" \"dense_act_fn\": \"gelu_new\",\n",
" \"dropout_rate\": 0.1,\n",
" \"eos_token_id\": 1,\n",
" \"feed_forward_proj\": \"gated-gelu\",\n",
" \"initializer_factor\": 1.0,\n",
" \"is_encoder_decoder\": true,\n",
" \"is_gated_act\": true,\n",
" \"layer_norm_epsilon\": 1e-06,\n",
" \"model_type\": \"t5\",\n",
" \"num_decoder_layers\": 8,\n",
" \"num_heads\": 6,\n",
" \"num_layers\": 8,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": 0,\n",
" \"relative_attention_max_distance\": 128,\n",
" \"relative_attention_num_buckets\": 32,\n",
" \"tie_word_embeddings\": false,\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 32128\n",
"}\n",
"\n",
"Downloading (…)ve/main/spiece.model: 100% 792k/792k [00:00<00:00, 10.2MB/s]\n",
"Downloading (…)cial_tokens_map.json: 100% 1.79k/1.79k [00:00<00:00, 705kB/s]\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file spiece.model from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/spiece.model\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file tokenizer.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file added_tokens.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file special_tokens_map.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/special_tokens_map.json\n",
"[INFO|tokenization_utils_base.py:1773] 2023-02-14 22:04:21,837 >> loading file tokenizer_config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/tokenizer_config.json\n",
"[INFO|configuration_utils.py:653] 2023-02-14 22:04:21,838 >> loading configuration file config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 22:04:21,838 >> Model config T5Config {\n",
" \"_name_or_path\": \"google/t5-v1_1-small\",\n",
" \"architectures\": [\n",
" \"T5ForConditionalGeneration\"\n",
" ],\n",
" \"d_ff\": 1024,\n",
" \"d_kv\": 64,\n",
" \"d_model\": 512,\n",
" \"decoder_start_token_id\": 0,\n",
" \"dense_act_fn\": \"gelu_new\",\n",
" \"dropout_rate\": 0.1,\n",
" \"eos_token_id\": 1,\n",
" \"feed_forward_proj\": \"gated-gelu\",\n",
" \"initializer_factor\": 1.0,\n",
" \"is_encoder_decoder\": true,\n",
" \"is_gated_act\": true,\n",
" \"layer_norm_epsilon\": 1e-06,\n",
" \"model_type\": \"t5\",\n",
" \"num_decoder_layers\": 8,\n",
" \"num_heads\": 6,\n",
" \"num_layers\": 8,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": 0,\n",
" \"relative_attention_max_distance\": 128,\n",
" \"relative_attention_num_buckets\": 32,\n",
" \"tie_word_embeddings\": false,\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 32128\n",
"}\n",
"\n",
"[INFO|configuration_utils.py:653] 2023-02-14 22:04:21,888 >> loading configuration file config.json from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json\n",
"[INFO|configuration_utils.py:705] 2023-02-14 22:04:21,889 >> Model config T5Config {\n",
" \"_name_or_path\": \"google/t5-v1_1-small\",\n",
" \"architectures\": [\n",
" \"T5ForConditionalGeneration\"\n",
" ],\n",
" \"d_ff\": 1024,\n",
" \"d_kv\": 64,\n",
" \"d_model\": 512,\n",
" \"decoder_start_token_id\": 0,\n",
" \"dense_act_fn\": \"gelu_new\",\n",
" \"dropout_rate\": 0.1,\n",
" \"eos_token_id\": 1,\n",
" \"feed_forward_proj\": \"gated-gelu\",\n",
" \"initializer_factor\": 1.0,\n",
" \"is_encoder_decoder\": true,\n",
" \"is_gated_act\": true,\n",
" \"layer_norm_epsilon\": 1e-06,\n",
" \"model_type\": \"t5\",\n",
" \"num_decoder_layers\": 8,\n",
" \"num_heads\": 6,\n",
" \"num_layers\": 8,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": 0,\n",
" \"relative_attention_max_distance\": 128,\n",
" \"relative_attention_num_buckets\": 32,\n",
" \"tie_word_embeddings\": false,\n",
" \"transformers_version\": \"4.23.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 32128\n",
"}\n",
"\n",
"Downloading (…)\"pytorch_model.bin\";: 100% 308M/308M [00:03<00:00, 84.8MB/s]\n",
"[INFO|modeling_utils.py:2156] 2023-02-14 22:04:26,050 >> loading weights file pytorch_model.bin from cache at t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/pytorch_model.bin\n",
"[INFO|modeling_utils.py:2606] 2023-02-14 22:04:27,048 >> All model checkpoint weights were used when initializing T5ForConditionalGeneration.\n",
"\n",
"[INFO|modeling_utils.py:2614] 2023-02-14 22:04:27,048 >> All the weights of T5ForConditionalGeneration were initialized from the model checkpoint at google/t5-v1_1-small.\n",
"If your task is similar to the task the model of the checkpoint was trained on, you can already use T5ForConditionalGeneration for predictions without further training.\n",
"\n",
"\n",
"Frozen layers:\n",
"[('encoder.block.1.layer.0.SelfAttention.q.weight', False), ('encoder.block.1.layer.0.SelfAttention.k.weight', False), ('encoder.block.1.layer.0.SelfAttention.v.weight', False), ('encoder.block.1.layer.0.SelfAttention.o.weight', False), ('encoder.block.1.layer.0.layer_norm.weight', False), ('encoder.block.1.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.1.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.1.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.1.layer.1.layer_norm.weight', False), ('encoder.block.2.layer.0.SelfAttention.q.weight', False), ('encoder.block.2.layer.0.SelfAttention.k.weight', False), ('encoder.block.2.layer.0.SelfAttention.v.weight', False), ('encoder.block.2.layer.0.SelfAttention.o.weight', False), ('encoder.block.2.layer.0.layer_norm.weight', False), ('encoder.block.2.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.2.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.2.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.2.layer.1.layer_norm.weight', False), ('encoder.block.3.layer.0.SelfAttention.q.weight', False), ('encoder.block.3.layer.0.SelfAttention.k.weight', False), ('encoder.block.3.layer.0.SelfAttention.v.weight', False), ('encoder.block.3.layer.0.SelfAttention.o.weight', False), ('encoder.block.3.layer.0.layer_norm.weight', False), ('encoder.block.3.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.3.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.3.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.3.layer.1.layer_norm.weight', False), ('encoder.block.4.layer.0.SelfAttention.q.weight', False), ('encoder.block.4.layer.0.SelfAttention.k.weight', False), ('encoder.block.4.layer.0.SelfAttention.v.weight', False), ('encoder.block.4.layer.0.SelfAttention.o.weight', False), ('encoder.block.4.layer.0.layer_norm.weight', False), ('encoder.block.4.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.4.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.4.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.4.layer.1.layer_norm.weight', False), ('encoder.block.5.layer.0.SelfAttention.q.weight', False), ('encoder.block.5.layer.0.SelfAttention.k.weight', False), ('encoder.block.5.layer.0.SelfAttention.v.weight', False), ('encoder.block.5.layer.0.SelfAttention.o.weight', False), ('encoder.block.5.layer.0.layer_norm.weight', False), ('encoder.block.5.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.5.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.5.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.5.layer.1.layer_norm.weight', False), ('encoder.block.6.layer.0.SelfAttention.q.weight', False), ('encoder.block.6.layer.0.SelfAttention.k.weight', False), ('encoder.block.6.layer.0.SelfAttention.v.weight', False), ('encoder.block.6.layer.0.SelfAttention.o.weight', False), ('encoder.block.6.layer.0.layer_norm.weight', False), ('encoder.block.6.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.6.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.6.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.6.layer.1.layer_norm.weight', False), ('encoder.block.7.layer.0.SelfAttention.q.weight', False), ('encoder.block.7.layer.0.SelfAttention.k.weight', False), ('encoder.block.7.layer.0.SelfAttention.v.weight', False), ('encoder.block.7.layer.0.SelfAttention.o.weight', False), ('encoder.block.7.layer.0.layer_norm.weight', False), ('encoder.block.7.layer.1.DenseReluDense.wi_0.weight', False), ('encoder.block.7.layer.1.DenseReluDense.wi_1.weight', False), ('encoder.block.7.layer.1.DenseReluDense.wo.weight', False), ('encoder.block.7.layer.1.layer_norm.weight', False)] \n",
"\n",
"\n",
"INFO:__main__:Using translation prefix: \"emotion classification: \"\n",
"Running tokenizer on train dataset: 0% 0/16 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-fa17416eabe18767.arrow\n",
"Running tokenizer on train dataset: 100% 16/16 [00:00<00:00, 23.64ba/s]\n",
"Running tokenizer on validation dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-c6cebbf9290f7df0.arrow\n",
"Running tokenizer on validation dataset: 100% 2/2 [00:00<00:00, 33.01ba/s]\n",
"Running tokenizer on prediction dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-988bff0993eee389.arrow\n",
"Running tokenizer on prediction dataset: 100% 2/2 [00:00<00:00, 33.06ba/s]\n",
"[INFO|trainer.py:503] 2023-02-14 22:04:30,902 >> max_steps is given, it will override any value given in num_train_epochs\n",
"/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" warnings.warn(\n",
"[INFO|trainer.py:1607] 2023-02-14 22:04:30,911 >> ***** Running training *****\n",
"[INFO|trainer.py:1608] 2023-02-14 22:04:30,911 >> Num examples = 16000\n",
"[INFO|trainer.py:1609] 2023-02-14 22:04:30,911 >> Num Epochs = 2\n",
"[INFO|trainer.py:1610] 2023-02-14 22:04:30,911 >> Instantaneous batch size per device = 8\n",
"[INFO|trainer.py:1611] 2023-02-14 22:04:30,911 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n",
"[INFO|trainer.py:1612] 2023-02-14 22:04:30,911 >> Gradient Accumulation steps = 1\n",
"[INFO|trainer.py:1613] 2023-02-14 22:04:30,911 >> Total optimization steps = 2500\n",
" 0% 0/2500 [00:00<?, ?it/s][WARNING|logging.py:281] 2023-02-14 22:04:30,925 >> You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n",
"{'loss': 21.5908, 'learning_rate': 4.8e-05, 'epoch': 0.05}\n",
"{'loss': 14.8264, 'learning_rate': 4.600000000000001e-05, 'epoch': 0.1}\n",
" 10% 249/2500 [00:24<03:31, 10.64it/s][INFO|trainer.py:2907] 2023-02-14 22:04:55,366 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:04:55,366 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:04:55,366 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:11, 21.87it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:14, 16.90it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:15, 15.84it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.48it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:15, 15.16it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:15, 15.04it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:15, 14.99it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:15, 14.93it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.86it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.64it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.61it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:15, 14.67it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:15, 14.63it/s]\u001b[A\n",
" 12% 30/250 [00:01<00:15, 14.64it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:14, 14.69it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.67it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.63it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.47it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.49it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.42it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.46it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:14, 14.50it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.59it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.59it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.57it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.64it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.64it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:13, 14.68it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:12, 14.73it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.69it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.70it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.66it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.72it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.78it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:12, 14.72it/s]\u001b[A\n",
" 30% 74/250 [00:04<00:11, 14.71it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:11, 14.75it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:11, 14.69it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.67it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.67it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.65it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.71it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:11, 14.73it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.71it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.58it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.50it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.51it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.56it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.58it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:10, 14.51it/s]\u001b[A\n",
" 42% 104/250 [00:07<00:10, 14.39it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:10, 14.35it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.47it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.45it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.40it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.44it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.52it/s]\u001b[A\n",
" 47% 118/250 [00:08<00:09, 14.53it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:08, 14.55it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.61it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.64it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.66it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.61it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.70it/s]\u001b[A\n",
" 53% 132/250 [00:08<00:07, 14.78it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.78it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.73it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.79it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.64it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.61it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.67it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.71it/s]\u001b[A\n",
" 59% 148/250 [00:10<00:06, 14.71it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.68it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.72it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.79it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.37it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.37it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.45it/s]\u001b[A\n",
" 65% 162/250 [00:11<00:06, 14.46it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.55it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.56it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.60it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.62it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.21it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.41it/s]\u001b[A\n",
" 70% 176/250 [00:11<00:05, 14.53it/s]\u001b[A\n",
" 71% 178/250 [00:12<00:04, 14.60it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.64it/s]\u001b[A\n",
" 73% 182/250 [00:12<00:04, 14.67it/s]\u001b[A\n",
" 74% 184/250 [00:12<00:04, 14.72it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.75it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.67it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.74it/s]\u001b[A\n",
" 77% 192/250 [00:13<00:03, 14.80it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.86it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.81it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.80it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.83it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.78it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.78it/s]\u001b[A\n",
" 82% 206/250 [00:14<00:02, 14.73it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.79it/s]\u001b[A\n",
" 84% 210/250 [00:14<00:02, 14.85it/s]\u001b[A\n",
" 85% 212/250 [00:14<00:02, 14.85it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.86it/s]\u001b[A\n",
" 86% 216/250 [00:14<00:02, 14.89it/s]\u001b[A\n",
" 87% 218/250 [00:14<00:02, 14.83it/s]\u001b[A\n",
" 88% 220/250 [00:14<00:02, 14.85it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.80it/s]\u001b[A\n",
" 10% 250/2500 [00:39<03:31, 10.64it/s]\n",
" 90% 226/250 [00:15<00:01, 14.77it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.81it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.86it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.84it/s]\u001b[A\n",
" 94% 234/250 [00:15<00:01, 14.70it/s]\u001b[A\n",
" 94% 236/250 [00:16<00:00, 14.63it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.73it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.69it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.71it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.79it/s]\u001b[A\n",
" 98% 246/250 [00:16<00:00, 14.77it/s]\u001b[A\n",
" 99% 248/250 [00:16<00:00, 14.73it/s]\u001b[A\n",
"100% 250/250 [00:16<00:00, 14.71it/s]\u001b[A\n",
"{'eval_loss': 9.001160621643066, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.2175, 'eval_samples_per_second': 116.161, 'eval_steps_per_second': 14.52, 'epoch': 0.12}\n",
"\n",
" 10% 250/2500 [00:41<03:31, 10.64it/s]\n",
"{'loss': 10.5792, 'learning_rate': 4.4000000000000006e-05, 'epoch': 0.15}\n",
"{'loss': 7.8113, 'learning_rate': 4.2e-05, 'epoch': 0.2}\n",
"{'loss': 5.2658, 'learning_rate': 4e-05, 'epoch': 0.25}\n",
" 20% 500/2500 [01:05<03:04, 10.83it/s][INFO|trainer.py:2907] 2023-02-14 22:05:35,963 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:05:35,963 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:05:35,963 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:11, 22.27it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:14, 17.12it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:14, 16.18it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.53it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:15, 15.20it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:15, 15.04it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:15, 14.93it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:15, 14.86it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.89it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.91it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.77it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:15, 14.79it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:15, 14.68it/s]\u001b[A\n",
" 12% 30/250 [00:01<00:14, 14.68it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:14, 14.69it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.71it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.71it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.71it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.64it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.61it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.63it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:13, 14.63it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.74it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.78it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.77it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.77it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.69it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:13, 14.71it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:12, 14.78it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.77it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.77it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.76it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.77it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.84it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:12, 14.77it/s]\u001b[A\n",
" 30% 74/250 [00:04<00:11, 14.68it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:11, 14.75it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:11, 14.75it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.76it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.79it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.77it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.77it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:10, 14.74it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.74it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.77it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.80it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.78it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.76it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.78it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:09, 14.81it/s]\u001b[A\n",
" 42% 104/250 [00:06<00:09, 14.81it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:09, 14.75it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.81it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.86it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.83it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.87it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.87it/s]\u001b[A\n",
" 47% 118/250 [00:07<00:08, 14.85it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:08, 14.73it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.74it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.77it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.75it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.75it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.64it/s]\u001b[A\n",
" 53% 132/250 [00:08<00:08, 14.49it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.57it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.57it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.60it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.62it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.64it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.57it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.63it/s]\u001b[A\n",
" 59% 148/250 [00:09<00:06, 14.62it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.61it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.63it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.72it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.75it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.62it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.67it/s]\u001b[A\n",
" 65% 162/250 [00:10<00:06, 14.65it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.68it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.61it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.62it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.58it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.64it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.67it/s]\u001b[A\n",
" 70% 176/250 [00:11<00:05, 14.67it/s]\u001b[A\n",
" 71% 178/250 [00:12<00:04, 14.60it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.49it/s]\u001b[A\n",
" 73% 182/250 [00:12<00:04, 14.47it/s]\u001b[A\n",
" 74% 184/250 [00:12<00:04, 14.53it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.57it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.58it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.64it/s]\u001b[A\n",
" 77% 192/250 [00:12<00:03, 14.64it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.30it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.43it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.54it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.58it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.65it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.67it/s]\u001b[A\n",
" 82% 206/250 [00:13<00:02, 14.68it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.70it/s]\u001b[A\n",
" 84% 210/250 [00:14<00:02, 14.73it/s]\u001b[A\n",
" 85% 212/250 [00:14<00:02, 14.75it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.77it/s]\u001b[A\n",
" 20% 500/2500 [01:19<03:04, 10.83it/s]\n",
" 87% 218/250 [00:14<00:02, 14.84it/s]\u001b[A\n",
" 88% 220/250 [00:14<00:02, 14.87it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.83it/s]\u001b[A\n",
" 90% 224/250 [00:15<00:01, 14.79it/s]\u001b[A\n",
" 90% 226/250 [00:15<00:01, 14.69it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.68it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.64it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.54it/s]\u001b[A\n",
" 94% 234/250 [00:15<00:01, 14.60it/s]\u001b[A\n",
" 94% 236/250 [00:15<00:00, 14.66it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.73it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.76it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.76it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.82it/s]\u001b[A\n",
" 98% 246/250 [00:16<00:00, 14.82it/s]\u001b[A\n",
" 99% 248/250 [00:16<00:00, 14.78it/s]\u001b[A\n",
"100% 250/250 [00:16<00:00, 14.79it/s]\u001b[A\n",
"{'eval_loss': 2.1697170734405518, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.1551, 'eval_samples_per_second': 116.584, 'eval_steps_per_second': 14.573, 'epoch': 0.25}\n",
"\n",
" 20% 500/2500 [01:22<03:04, 10.83it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:05:53,119 >> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:05:53,120 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:05:53,749 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:05:53,750 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:05:53,750 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-500/special_tokens_map.json\n",
"[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:05:53,788 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-500/spiece.model\n",
"{'loss': 3.7795, 'learning_rate': 3.8e-05, 'epoch': 0.3}\n",
"{'loss': 2.9169, 'learning_rate': 3.6e-05, 'epoch': 0.35}\n",
" 30% 749/2500 [01:47<02:43, 10.71it/s][INFO|trainer.py:2907] 2023-02-14 22:06:18,135 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:06:18,136 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:06:18,136 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:11, 21.21it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:14, 16.54it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:15, 15.62it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.04it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:16, 14.78it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:16, 14.60it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:16, 14.53it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:16, 14.44it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.51it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.57it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.56it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:15, 14.65it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:15, 14.64it/s]\u001b[A\n",
" 12% 30/250 [00:02<00:15, 14.66it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:14, 14.63it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.67it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.64it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.60it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.58it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.59it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.65it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:13, 14.69it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.78it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.85it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.84it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.80it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.77it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:12, 14.77it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:12, 14.81it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.78it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.76it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.71it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.73it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.66it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:12, 14.69it/s]\u001b[A\n",
" 30% 74/250 [00:05<00:12, 14.64it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:11, 14.70it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:11, 14.70it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.76it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.76it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.71it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.74it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:10, 14.76it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.69it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.71it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.75it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.72it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.70it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.68it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:10, 14.69it/s]\u001b[A\n",
" 42% 104/250 [00:07<00:09, 14.72it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:09, 14.65it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.66it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.70it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.69it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.63it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.69it/s]\u001b[A\n",
" 47% 118/250 [00:07<00:08, 14.71it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:08, 14.59it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.68it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.68it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.71it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.73it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.64it/s]\u001b[A\n",
" 53% 132/250 [00:08<00:08, 14.70it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.74it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.41it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.46it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.51it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.60it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.50it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.53it/s]\u001b[A\n",
" 59% 148/250 [00:10<00:07, 14.55it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.53it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.48it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.60it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.54it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.46it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.42it/s]\u001b[A\n",
" 65% 162/250 [00:11<00:06, 14.38it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.38it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.32it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.33it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.23it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.23it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.24it/s]\u001b[A\n",
" 70% 176/250 [00:12<00:05, 14.21it/s]\u001b[A\n",
" 71% 178/250 [00:12<00:05, 14.17it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.16it/s]\u001b[A\n",
" 30% 750/2500 [01:59<02:43, 10.71it/s]\n",
" 74% 184/250 [00:12<00:04, 14.30it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.40it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.40it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.48it/s]\u001b[A\n",
" 77% 192/250 [00:13<00:03, 14.58it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.58it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.56it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.62it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.69it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.69it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.68it/s]\u001b[A\n",
" 82% 206/250 [00:14<00:02, 14.68it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.68it/s]\u001b[A\n",
" 84% 210/250 [00:14<00:02, 14.65it/s]\u001b[A\n",
" 85% 212/250 [00:14<00:02, 14.72it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.71it/s]\u001b[A\n",
" 86% 216/250 [00:14<00:02, 14.68it/s]\u001b[A\n",
" 87% 218/250 [00:14<00:02, 14.69it/s]\u001b[A\n",
" 88% 220/250 [00:15<00:02, 14.75it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.74it/s]\u001b[A\n",
" 90% 224/250 [00:15<00:01, 14.76it/s]\u001b[A\n",
" 90% 226/250 [00:15<00:01, 14.73it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.82it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.77it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.75it/s]\u001b[A\n",
" 94% 234/250 [00:15<00:01, 14.67it/s]\u001b[A\n",
" 94% 236/250 [00:16<00:00, 14.65it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.64it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.60it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.60it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.26it/s]\u001b[A\n",
" 98% 246/250 [00:16<00:00, 14.42it/s]\u001b[A\n",
" 99% 248/250 [00:16<00:00, 14.45it/s]\u001b[A\n",
"100% 250/250 [00:17<00:00, 14.54it/s]\u001b[A\n",
"{'eval_loss': 1.4527522325515747, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.2954, 'eval_samples_per_second': 115.638, 'eval_steps_per_second': 14.455, 'epoch': 0.38}\n",
"\n",
" 30% 750/2500 [02:04<02:43, 10.71it/s]\n",
"{'loss': 2.4516, 'learning_rate': 3.4000000000000007e-05, 'epoch': 0.4}\n",
"{'loss': 2.2293, 'learning_rate': 3.2000000000000005e-05, 'epoch': 0.45}\n",
"{'loss': 2.0123, 'learning_rate': 3e-05, 'epoch': 0.5}\n",
" 40% 1000/2500 [02:27<02:21, 10.63it/s][INFO|trainer.py:2907] 2023-02-14 22:06:58,636 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:06:58,636 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:06:58,636 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:12, 20.13it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:15, 16.26it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:15, 15.45it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.09it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:16, 14.85it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:16, 14.66it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:16, 14.56it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:15, 14.65it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.77it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.88it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.83it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:14, 14.94it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:14, 14.94it/s]\u001b[A\n",
" 12% 30/250 [00:01<00:14, 14.96it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:14, 14.80it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.82it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.73it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.59it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.47it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.47it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.53it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:14, 14.19it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.44it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.54it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.56it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.64it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.70it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:13, 14.71it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:12, 14.77it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.80it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.79it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.79it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.83it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.89it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:11, 14.88it/s]\u001b[A\n",
" 30% 74/250 [00:04<00:11, 14.83it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:11, 14.83it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:11, 14.83it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.81it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.78it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.78it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.85it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:10, 14.79it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.68it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.71it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.76it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.70it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.74it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.72it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:10, 14.76it/s]\u001b[A\n",
" 42% 104/250 [00:07<00:09, 14.79it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:09, 14.72it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.81it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.84it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.83it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.82it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.85it/s]\u001b[A\n",
" 47% 118/250 [00:07<00:08, 14.85it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:08, 14.80it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.85it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.87it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.88it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.78it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.78it/s]\u001b[A\n",
" 53% 132/250 [00:08<00:07, 14.81it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.79it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.77it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.77it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.81it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.84it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.84it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.83it/s]\u001b[A\n",
" 59% 148/250 [00:09<00:06, 14.83it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.74it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.68it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.76it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.77it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.77it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.80it/s]\u001b[A\n",
" 65% 162/250 [00:10<00:05, 14.70it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.68it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.62it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.69it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.75it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.82it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.87it/s]\u001b[A\n",
" 40% 1000/2500 [02:39<02:21, 10.63it/s]\n",
" 71% 178/250 [00:12<00:04, 14.75it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.69it/s]\u001b[A\n",
" 73% 182/250 [00:12<00:04, 14.68it/s]\u001b[A\n",
" 74% 184/250 [00:12<00:04, 14.68it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.73it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.69it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.71it/s]\u001b[A\n",
" 77% 192/250 [00:12<00:03, 14.65it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.65it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.61it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.66it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.63it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.65it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.66it/s]\u001b[A\n",
" 82% 206/250 [00:13<00:03, 14.58it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.63it/s]\u001b[A\n",
" 84% 210/250 [00:14<00:02, 14.68it/s]\u001b[A\n",
" 85% 212/250 [00:14<00:02, 14.65it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.69it/s]\u001b[A\n",
" 86% 216/250 [00:14<00:02, 14.72it/s]\u001b[A\n",
" 87% 218/250 [00:14<00:02, 14.67it/s]\u001b[A\n",
" 88% 220/250 [00:14<00:02, 14.74it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.70it/s]\u001b[A\n",
" 90% 224/250 [00:15<00:01, 14.64it/s]\u001b[A\n",
" 90% 226/250 [00:15<00:01, 14.67it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.70it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.69it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.76it/s]\u001b[A\n",
" 94% 234/250 [00:15<00:01, 14.76it/s]\u001b[A\n",
" 94% 236/250 [00:15<00:00, 14.73it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.82it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.87it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.88it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.90it/s]\u001b[A\n",
" 98% 246/250 [00:16<00:00, 14.91it/s]\u001b[A\n",
" 99% 248/250 [00:16<00:00, 14.90it/s]\u001b[A\n",
"100% 250/250 [00:16<00:00, 14.92it/s]\u001b[A\n",
"{'eval_loss': 1.160749912261963, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.1471, 'eval_samples_per_second': 116.638, 'eval_steps_per_second': 14.58, 'epoch': 0.5}\n",
"\n",
" 40% 1000/2500 [02:44<02:21, 10.63it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:07:15,784 >> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-1000\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:07:15,785 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-1000/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:07:16,414 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-1000/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:07:16,415 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-1000/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:07:16,416 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-1000/special_tokens_map.json\n",
"[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:07:16,453 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-1000/spiece.model\n",
"{'loss': 1.9003, 'learning_rate': 2.8000000000000003e-05, 'epoch': 0.55}\n",
"{'loss': 1.7884, 'learning_rate': 2.6000000000000002e-05, 'epoch': 0.6}\n",
" 50% 1249/2500 [03:09<01:59, 10.49it/s][INFO|trainer.py:2907] 2023-02-14 22:07:40,879 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:07:40,879 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:07:40,879 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:11, 21.99it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:14, 17.06it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:15, 16.09it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.50it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:15, 15.00it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:15, 14.84it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:15, 14.74it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:15, 14.69it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.74it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.73it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.70it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:15, 14.71it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:15, 14.56it/s]\u001b[A\n",
" 12% 30/250 [00:01<00:15, 14.62it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:14, 14.64it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.56it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.57it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.60it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.60it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.57it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.61it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:13, 14.64it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.75it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.78it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.73it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.71it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.68it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:13, 14.63it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:12, 14.74it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.73it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.68it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.64it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.65it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.68it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:12, 14.29it/s]\u001b[A\n",
" 30% 74/250 [00:05<00:12, 14.38it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:12, 14.47it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:11, 14.52it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.64it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.66it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.64it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.66it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:11, 14.72it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.73it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.69it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.75it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.69it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.64it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.67it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:10, 14.71it/s]\u001b[A\n",
" 42% 104/250 [00:07<00:09, 14.75it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:09, 14.71it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.80it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.84it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.73it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.73it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.67it/s]\u001b[A\n",
" 47% 118/250 [00:07<00:09, 14.50it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:08, 14.51it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.63it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.69it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.67it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.62it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.60it/s]\u001b[A\n",
" 53% 132/250 [00:08<00:08, 14.59it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.64it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.65it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.71it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.67it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.70it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.67it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.62it/s]\u001b[A\n",
" 59% 148/250 [00:10<00:06, 14.65it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.58it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.55it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.58it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.57it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.59it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.66it/s]\u001b[A\n",
" 65% 162/250 [00:11<00:06, 14.53it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.72it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.60it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.52it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.50it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.49it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.47it/s]\u001b[A\n",
" 70% 176/250 [00:11<00:05, 14.37it/s]\u001b[A\n",
" 71% 178/250 [00:12<00:05, 14.29it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.27it/s]\u001b[A\n",
" 73% 182/250 [00:12<00:04, 14.25it/s]\u001b[A\n",
" 74% 184/250 [00:12<00:04, 14.27it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.24it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.18it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.22it/s]\u001b[A\n",
" 77% 192/250 [00:13<00:04, 14.16it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.21it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.22it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.27it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.28it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.16it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.06it/s]\u001b[A\n",
" 82% 206/250 [00:14<00:03, 14.05it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.06it/s]\u001b[A\n",
" 84% 210/250 [00:14<00:02, 14.06it/s]\u001b[A\n",
" 85% 212/250 [00:14<00:02, 13.87it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.01it/s]\u001b[A\n",
" 86% 216/250 [00:14<00:02, 14.22it/s]\u001b[A\n",
" 87% 218/250 [00:14<00:02, 14.28it/s]\u001b[A\n",
" 88% 220/250 [00:15<00:02, 14.42it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.39it/s]\u001b[A\n",
" 90% 224/250 [00:15<00:01, 14.35it/s]\u001b[A\n",
" 90% 226/250 [00:15<00:01, 14.49it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.57it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.65it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.74it/s]\u001b[A\n",
" 94% 234/250 [00:16<00:01, 14.73it/s]\u001b[A\n",
" 94% 236/250 [00:16<00:00, 14.74it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.80it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.79it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.78it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.83it/s]\u001b[A\n",
" 98% 246/250 [00:16<00:00, 14.81it/s]\u001b[A\n",
" 99% 248/250 [00:16<00:00, 14.72it/s]\u001b[A\n",
"100% 250/250 [00:17<00:00, 14.63it/s]\u001b[A\n",
"{'eval_loss': 1.0410572290420532, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.3319, 'eval_samples_per_second': 115.394, 'eval_steps_per_second': 14.424, 'epoch': 0.62}\n",
"\n",
" 50% 1250/2500 [03:27<01:59, 10.49it/s]\n",
"{'loss': 1.7415, 'learning_rate': 2.4e-05, 'epoch': 0.65}\n",
"{'loss': 1.6231, 'learning_rate': 2.2000000000000003e-05, 'epoch': 0.7}\n",
"{'loss': 1.5278, 'learning_rate': 2e-05, 'epoch': 0.75}\n",
" 60% 1500/2500 [03:50<01:33, 10.71it/s][INFO|trainer.py:2907] 2023-02-14 22:08:21,432 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:08:21,433 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:08:21,433 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:11, 21.79it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:14, 16.88it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:15, 15.94it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.36it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:15, 14.98it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:16, 14.72it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:16, 14.47it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:16, 14.40it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.48it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.54it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.57it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:15, 14.56it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:15, 14.52it/s]\u001b[A\n",
" 12% 30/250 [00:02<00:15, 14.53it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:15, 14.51it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.51it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.53it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.51it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.52it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.52it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.53it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:13, 14.62it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.58it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.66it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.70it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.75it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.69it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:13, 14.72it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:12, 14.72it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.72it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.66it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.65it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.72it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.80it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:12, 14.80it/s]\u001b[A\n",
" 30% 74/250 [00:05<00:11, 14.74it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:11, 14.77it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:11, 14.59it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.69it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.69it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.67it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.75it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:10, 14.80it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.82it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.80it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.81it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.78it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.78it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.73it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:10, 14.73it/s]\u001b[A\n",
" 42% 104/250 [00:07<00:09, 14.81it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:09, 14.73it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.74it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.78it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.73it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.75it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.80it/s]\u001b[A\n",
" 47% 118/250 [00:07<00:08, 14.80it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:08, 14.79it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.81it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.76it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.80it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.80it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.81it/s]\u001b[A\n",
" 53% 132/250 [00:08<00:07, 14.82it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.82it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.70it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.70it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.72it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.73it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.71it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.74it/s]\u001b[A\n",
" 59% 148/250 [00:10<00:06, 14.74it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.78it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.74it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.79it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.79it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.78it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.85it/s]\u001b[A\n",
" 65% 162/250 [00:10<00:05, 14.82it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.85it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.89it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.85it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.67it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.56it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.69it/s]\u001b[A\n",
" 70% 176/250 [00:11<00:05, 14.70it/s]\u001b[A\n",
" 71% 178/250 [00:12<00:04, 14.69it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.73it/s]\u001b[A\n",
" 73% 182/250 [00:12<00:04, 14.75it/s]\u001b[A\n",
" 74% 184/250 [00:12<00:04, 14.78it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.85it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.87it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.91it/s]\u001b[A\n",
" 77% 192/250 [00:12<00:03, 14.91it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.81it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.65it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.54it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.59it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.63it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.63it/s]\u001b[A\n",
" 82% 206/250 [00:13<00:03, 14.50it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.58it/s]\u001b[A\n",
" 84% 210/250 [00:14<00:02, 14.65it/s]\u001b[A\n",
" 85% 212/250 [00:14<00:02, 14.65it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.49it/s]\u001b[A\n",
" 86% 216/250 [00:14<00:02, 14.58it/s]\u001b[A\n",
" 87% 218/250 [00:14<00:02, 14.58it/s]\u001b[A\n",
" 88% 220/250 [00:14<00:02, 14.66it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.57it/s]\u001b[A\n",
" 90% 224/250 [00:15<00:01, 14.56it/s]\u001b[A\n",
" 90% 226/250 [00:15<00:01, 14.58it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.56it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.55it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.49it/s]\u001b[A\n",
" 94% 234/250 [00:15<00:01, 14.42it/s]\u001b[A\n",
" 94% 236/250 [00:16<00:00, 14.39it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.40it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.35it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.37it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.43it/s]\u001b[A\n",
" 98% 246/250 [00:16<00:00, 14.44it/s]\u001b[A\n",
" 99% 248/250 [00:16<00:00, 14.44it/s]\u001b[A\n",
"100% 250/250 [00:16<00:00, 14.48it/s]\u001b[A\n",
"{'eval_loss': 0.9458380341529846, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.379, 'eval_samples_per_second': 115.081, 'eval_steps_per_second': 14.385, 'epoch': 0.75}\n",
"\n",
" 60% 1500/2500 [04:07<01:33, 10.71it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:08:38,813 >> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-1500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:08:38,814 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-1500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:08:39,285 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-1500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:08:39,286 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-1500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:08:39,286 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-1500/special_tokens_map.json\n",
"[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:08:39,322 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-1500/spiece.model\n",
"{'loss': 1.4835, 'learning_rate': 1.8e-05, 'epoch': 0.8}\n",
"{'loss': 1.449, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.85}\n",
" 70% 1749/2500 [04:32<01:10, 10.61it/s][INFO|trainer.py:2907] 2023-02-14 22:09:03,363 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:09:03,363 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:09:03,363 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:11, 22.10it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:14, 17.10it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:14, 16.16it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.48it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:15, 15.17it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:15, 15.00it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:15, 14.90it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:15, 14.70it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.59it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.58it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.51it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:15, 14.59it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:15, 14.60it/s]\u001b[A\n",
" 12% 30/250 [00:01<00:15, 14.64it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:15, 14.49it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.52it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.45it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.39it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.44it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.41it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.46it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:14, 14.46it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.54it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.47it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.51it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.54it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.62it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:13, 14.65it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:12, 14.69it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.75it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.58it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.53it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.61it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.65it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:12, 14.66it/s]\u001b[A\n",
" 30% 74/250 [00:05<00:11, 14.67it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:11, 14.70it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:11, 14.63it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.64it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.63it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.67it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.75it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:10, 14.75it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.78it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.83it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.73it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.68it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.65it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.68it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:10, 14.74it/s]\u001b[A\n",
" 42% 104/250 [00:07<00:09, 14.80it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:09, 14.77it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.78it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.83it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.76it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.80it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.68it/s]\u001b[A\n",
" 47% 118/250 [00:08<00:08, 14.68it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:08, 14.59it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.60it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.58it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.63it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.64it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.67it/s]\u001b[A\n",
" 53% 132/250 [00:08<00:08, 14.66it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.74it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.74it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.71it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.67it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.66it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.65it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.66it/s]\u001b[A\n",
" 59% 148/250 [00:10<00:06, 14.62it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.64it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.63it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.60it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.52it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.55it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.63it/s]\u001b[A\n",
" 65% 162/250 [00:11<00:06, 14.62it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.65it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.62it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.69it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.71it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.72it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.54it/s]\u001b[A\n",
" 70% 176/250 [00:11<00:05, 14.61it/s]\u001b[A\n",
" 71% 178/250 [00:12<00:04, 14.65it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.67it/s]\u001b[A\n",
" 73% 182/250 [00:12<00:04, 14.65it/s]\u001b[A\n",
" 74% 184/250 [00:12<00:04, 14.65it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.66it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.62it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.69it/s]\u001b[A\n",
" 77% 192/250 [00:13<00:03, 14.74it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.81it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.77it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.79it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.50it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.42it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.48it/s]\u001b[A\n",
" 82% 206/250 [00:14<00:03, 14.52it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.56it/s]\u001b[A\n",
" 84% 210/250 [00:14<00:02, 14.53it/s]\u001b[A\n",
" 85% 212/250 [00:14<00:02, 14.58it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.61it/s]\u001b[A\n",
" 86% 216/250 [00:14<00:02, 14.69it/s]\u001b[A\n",
" 87% 218/250 [00:14<00:02, 14.70it/s]\u001b[A\n",
" 88% 220/250 [00:14<00:02, 14.75it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.70it/s]\u001b[A\n",
" 90% 224/250 [00:15<00:01, 14.75it/s]\u001b[A\n",
" 90% 226/250 [00:15<00:01, 14.71it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.74it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.68it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.72it/s]\u001b[A\n",
" 94% 234/250 [00:15<00:01, 14.71it/s]\u001b[A\n",
" 94% 236/250 [00:16<00:00, 14.62it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.59it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.57it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.61it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.67it/s]\u001b[A\n",
" 98% 246/250 [00:16<00:00, 14.60it/s]\u001b[A\n",
" 99% 248/250 [00:16<00:00, 14.63it/s]\u001b[A\n",
"100% 250/250 [00:17<00:00, 14.66it/s]\u001b[A\n",
"{'eval_loss': 0.8559792637825012, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.2321, 'eval_samples_per_second': 116.063, 'eval_steps_per_second': 14.508, 'epoch': 0.88}\n",
"\n",
" 70% 1750/2500 [04:49<01:10, 10.61it/s]\n",
"{'loss': 1.4421, 'learning_rate': 1.4000000000000001e-05, 'epoch': 0.9}\n",
"{'loss': 1.3835, 'learning_rate': 1.2e-05, 'epoch': 0.95}\n",
"{'loss': 1.325, 'learning_rate': 1e-05, 'epoch': 1.0}\n",
" 80% 2000/2500 [05:12<00:45, 10.89it/s][INFO|trainer.py:2907] 2023-02-14 22:09:43,863 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:09:43,863 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:09:43,863 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:11, 21.99it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:14, 17.18it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:14, 16.14it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.55it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:15, 15.22it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:15, 15.01it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:15, 14.86it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:15, 14.84it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.87it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.65it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.46it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:15, 14.51it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:15, 14.51it/s]\u001b[A\n",
" 12% 30/250 [00:01<00:15, 14.50it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:14, 14.59it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.65it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.69it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.72it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.71it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.69it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.61it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:14, 14.52it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.56it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.62it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.60it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.56it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.40it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:13, 14.43it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:13, 14.46it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.51it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.50it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.44it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.49it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.50it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:12, 14.52it/s]\u001b[A\n",
" 30% 74/250 [00:05<00:12, 14.53it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:12, 14.50it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:12, 14.33it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.36it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.41it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.37it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.42it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:11, 14.52it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.55it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.57it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.63it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.64it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.57it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.51it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:10, 14.60it/s]\u001b[A\n",
" 42% 104/250 [00:07<00:09, 14.63it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:09, 14.57it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.67it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.68it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.65it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.65it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.52it/s]\u001b[A\n",
" 47% 118/250 [00:08<00:09, 14.52it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:08, 14.51it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.61it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.70it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.75it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.71it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.72it/s]\u001b[A\n",
" 53% 132/250 [00:08<00:08, 14.71it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.76it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.77it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.81it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.87it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.90it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.69it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.69it/s]\u001b[A\n",
" 59% 148/250 [00:10<00:06, 14.70it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.62it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.63it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.73it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.71it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.64it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.64it/s]\u001b[A\n",
" 65% 162/250 [00:11<00:05, 14.70it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.78it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.78it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.82it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.86it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.87it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.91it/s]\u001b[A\n",
" 70% 176/250 [00:11<00:05, 14.53it/s]\u001b[A\n",
" 71% 178/250 [00:12<00:04, 14.56it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.57it/s]\u001b[A\n",
" 73% 182/250 [00:12<00:04, 14.63it/s]\u001b[A\n",
" 74% 184/250 [00:12<00:04, 14.69it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.75it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.69it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.65it/s]\u001b[A\n",
" 77% 192/250 [00:13<00:03, 14.69it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.64it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.67it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.72it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.70it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.61it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.59it/s]\u001b[A\n",
" 82% 206/250 [00:14<00:03, 14.53it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.63it/s]\u001b[A\n",
" 84% 210/250 [00:14<00:02, 14.70it/s]\u001b[A\n",
" 85% 212/250 [00:14<00:02, 14.68it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.67it/s]\u001b[A\n",
" 86% 216/250 [00:14<00:02, 14.73it/s]\u001b[A\n",
" 87% 218/250 [00:14<00:02, 14.76it/s]\u001b[A\n",
" 88% 220/250 [00:14<00:02, 14.74it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.74it/s]\u001b[A\n",
" 90% 224/250 [00:15<00:01, 14.75it/s]\u001b[A\n",
" 90% 226/250 [00:15<00:01, 14.74it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.70it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.60it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.68it/s]\u001b[A\n",
" 94% 234/250 [00:15<00:01, 14.47it/s]\u001b[A\n",
" 94% 236/250 [00:16<00:00, 14.53it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.60it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.61it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.66it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.70it/s]\u001b[A\n",
" 80% 2000/2500 [05:29<00:45, 10.89it/s]\n",
" 99% 248/250 [00:16<00:00, 14.70it/s]\u001b[A\n",
"100% 250/250 [00:17<00:00, 14.62it/s]\u001b[A\n",
"{'eval_loss': 0.8163257241249084, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.2395, 'eval_samples_per_second': 116.013, 'eval_steps_per_second': 14.502, 'epoch': 1.0}\n",
"\n",
" 80% 2000/2500 [05:30<00:45, 10.89it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:10:01,104 >> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-2000\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:10:01,105 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-2000/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:10:01,585 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-2000/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:10:01,586 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-2000/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:10:01,586 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-2000/special_tokens_map.json\n",
"[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:10:01,623 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-2000/spiece.model\n",
"{'loss': 1.2708, 'learning_rate': 8.000000000000001e-06, 'epoch': 1.05}\n",
"{'loss': 1.3351, 'learning_rate': 6e-06, 'epoch': 1.1}\n",
" 90% 2249/2500 [05:54<00:23, 10.80it/s][INFO|trainer.py:2907] 2023-02-14 22:10:25,736 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:10:25,736 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:10:25,736 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:11, 21.89it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:14, 16.90it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:15, 16.04it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.53it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:15, 15.20it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:15, 14.99it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:15, 14.93it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:15, 14.90it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.70it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.76it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.76it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:15, 14.80it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:15, 14.78it/s]\u001b[A\n",
" 12% 30/250 [00:01<00:15, 14.64it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:14, 14.61it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.64it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.56it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.63it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.69it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.72it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.57it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:14, 14.53it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.45it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.54it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.55it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.57it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.53it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:13, 14.45it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:13, 14.50it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.57it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.41it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.43it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.54it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.54it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:12, 14.48it/s]\u001b[A\n",
" 30% 74/250 [00:05<00:12, 14.39it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:11, 14.52it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:11, 14.52it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.50it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.49it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.54it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.62it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:11, 14.63it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.59it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.69it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.65it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.60it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.63it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.66it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:10, 14.65it/s]\u001b[A\n",
" 42% 104/250 [00:07<00:09, 14.69it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:09, 14.67it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.75it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.77it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.76it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.78it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.82it/s]\u001b[A\n",
" 47% 118/250 [00:08<00:08, 14.79it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:08, 14.80it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.80it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.83it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.81it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.78it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.77it/s]\u001b[A\n",
" 53% 132/250 [00:08<00:07, 14.80it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.70it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.65it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.65it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.61it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.69it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.75it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.72it/s]\u001b[A\n",
" 59% 148/250 [00:10<00:06, 14.69it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.65it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.62it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.60it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.64it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.63it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.71it/s]\u001b[A\n",
" 65% 162/250 [00:10<00:05, 14.69it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.77it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.78it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.79it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.73it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.73it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.79it/s]\u001b[A\n",
" 70% 176/250 [00:11<00:05, 14.80it/s]\u001b[A\n",
" 71% 178/250 [00:12<00:04, 14.70it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.66it/s]\u001b[A\n",
" 73% 182/250 [00:12<00:04, 14.67it/s]\u001b[A\n",
" 74% 184/250 [00:12<00:04, 14.71it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.76it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.73it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.79it/s]\u001b[A\n",
" 77% 192/250 [00:13<00:03, 14.72it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.64it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.67it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.68it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.74it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.74it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.67it/s]\u001b[A\n",
" 82% 206/250 [00:13<00:03, 14.65it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.31it/s]\u001b[A\n",
" 84% 210/250 [00:14<00:02, 14.45it/s]\u001b[A\n",
" 85% 212/250 [00:14<00:02, 14.61it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.60it/s]\u001b[A\n",
" 86% 216/250 [00:14<00:02, 14.70it/s]\u001b[A\n",
" 90% 2250/2500 [06:09<00:23, 10.80it/s]\n",
" 88% 220/250 [00:14<00:02, 14.71it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.67it/s]\u001b[A\n",
" 90% 224/250 [00:15<00:01, 14.73it/s]\u001b[A\n",
" 90% 226/250 [00:15<00:01, 14.77it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.83it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.84it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.82it/s]\u001b[A\n",
" 94% 234/250 [00:15<00:01, 14.80it/s]\u001b[A\n",
" 94% 236/250 [00:16<00:00, 14.78it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.63it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.62it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.66it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.69it/s]\u001b[A\n",
" 98% 246/250 [00:16<00:00, 14.68it/s]\u001b[A\n",
" 99% 248/250 [00:16<00:00, 14.62it/s]\u001b[A\n",
"100% 250/250 [00:16<00:00, 14.59it/s]\u001b[A\n",
"{'eval_loss': 0.8037287592887878, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.2062, 'eval_samples_per_second': 116.237, 'eval_steps_per_second': 14.53, 'epoch': 1.12}\n",
"\n",
" 90% 2250/2500 [06:12<00:23, 10.80it/s]\n",
"{'loss': 1.2308, 'learning_rate': 4.000000000000001e-06, 'epoch': 1.15}\n",
"{'loss': 1.376, 'learning_rate': 2.0000000000000003e-06, 'epoch': 1.2}\n",
"{'loss': 1.2416, 'learning_rate': 0.0, 'epoch': 1.25}\n",
"100% 2500/2500 [06:35<00:00, 10.84it/s][INFO|trainer.py:2907] 2023-02-14 22:11:06,282 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:11:06,283 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:11:06,283 >> Batch size = 8\n",
"\n",
" 0% 0/250 [00:00<?, ?it/s]\u001b[A\n",
" 1% 3/250 [00:00<00:11, 21.34it/s]\u001b[A\n",
" 2% 6/250 [00:00<00:14, 16.78it/s]\u001b[A\n",
" 3% 8/250 [00:00<00:15, 15.85it/s]\u001b[A\n",
" 4% 10/250 [00:00<00:15, 15.37it/s]\u001b[A\n",
" 5% 12/250 [00:00<00:15, 15.00it/s]\u001b[A\n",
" 6% 14/250 [00:00<00:15, 14.91it/s]\u001b[A\n",
" 6% 16/250 [00:01<00:15, 14.80it/s]\u001b[A\n",
" 7% 18/250 [00:01<00:15, 14.76it/s]\u001b[A\n",
" 8% 20/250 [00:01<00:15, 14.78it/s]\u001b[A\n",
" 9% 22/250 [00:01<00:15, 14.67it/s]\u001b[A\n",
" 10% 24/250 [00:01<00:15, 14.60it/s]\u001b[A\n",
" 10% 26/250 [00:01<00:15, 14.65it/s]\u001b[A\n",
" 11% 28/250 [00:01<00:15, 14.63it/s]\u001b[A\n",
" 12% 30/250 [00:01<00:14, 14.67it/s]\u001b[A\n",
" 13% 32/250 [00:02<00:14, 14.64it/s]\u001b[A\n",
" 14% 34/250 [00:02<00:14, 14.68it/s]\u001b[A\n",
" 14% 36/250 [00:02<00:14, 14.62it/s]\u001b[A\n",
" 15% 38/250 [00:02<00:14, 14.53it/s]\u001b[A\n",
" 16% 40/250 [00:02<00:14, 14.59it/s]\u001b[A\n",
" 17% 42/250 [00:02<00:14, 14.63it/s]\u001b[A\n",
" 18% 44/250 [00:02<00:14, 14.57it/s]\u001b[A\n",
" 18% 46/250 [00:03<00:13, 14.67it/s]\u001b[A\n",
" 19% 48/250 [00:03<00:13, 14.73it/s]\u001b[A\n",
" 20% 50/250 [00:03<00:13, 14.82it/s]\u001b[A\n",
" 21% 52/250 [00:03<00:13, 14.79it/s]\u001b[A\n",
" 22% 54/250 [00:03<00:13, 14.71it/s]\u001b[A\n",
" 22% 56/250 [00:03<00:13, 14.70it/s]\u001b[A\n",
" 23% 58/250 [00:03<00:13, 14.59it/s]\u001b[A\n",
" 24% 60/250 [00:04<00:13, 14.53it/s]\u001b[A\n",
" 25% 62/250 [00:04<00:12, 14.46it/s]\u001b[A\n",
" 26% 64/250 [00:04<00:12, 14.47it/s]\u001b[A\n",
" 26% 66/250 [00:04<00:12, 14.48it/s]\u001b[A\n",
" 27% 68/250 [00:04<00:12, 14.65it/s]\u001b[A\n",
" 28% 70/250 [00:04<00:12, 14.77it/s]\u001b[A\n",
" 29% 72/250 [00:04<00:12, 14.74it/s]\u001b[A\n",
" 30% 74/250 [00:04<00:12, 14.66it/s]\u001b[A\n",
" 30% 76/250 [00:05<00:11, 14.67it/s]\u001b[A\n",
" 31% 78/250 [00:05<00:11, 14.68it/s]\u001b[A\n",
" 32% 80/250 [00:05<00:11, 14.70it/s]\u001b[A\n",
" 33% 82/250 [00:05<00:11, 14.66it/s]\u001b[A\n",
" 34% 84/250 [00:05<00:11, 14.61it/s]\u001b[A\n",
" 34% 86/250 [00:05<00:11, 14.62it/s]\u001b[A\n",
" 35% 88/250 [00:05<00:11, 14.56it/s]\u001b[A\n",
" 36% 90/250 [00:06<00:10, 14.59it/s]\u001b[A\n",
" 37% 92/250 [00:06<00:10, 14.51it/s]\u001b[A\n",
" 38% 94/250 [00:06<00:10, 14.38it/s]\u001b[A\n",
" 38% 96/250 [00:06<00:10, 14.33it/s]\u001b[A\n",
" 39% 98/250 [00:06<00:10, 14.30it/s]\u001b[A\n",
" 40% 100/250 [00:06<00:10, 14.35it/s]\u001b[A\n",
" 41% 102/250 [00:06<00:10, 14.40it/s]\u001b[A\n",
" 42% 104/250 [00:07<00:10, 14.40it/s]\u001b[A\n",
" 42% 106/250 [00:07<00:10, 14.36it/s]\u001b[A\n",
" 43% 108/250 [00:07<00:09, 14.27it/s]\u001b[A\n",
" 44% 110/250 [00:07<00:09, 14.36it/s]\u001b[A\n",
" 45% 112/250 [00:07<00:09, 14.34it/s]\u001b[A\n",
" 46% 114/250 [00:07<00:09, 14.33it/s]\u001b[A\n",
" 46% 116/250 [00:07<00:09, 14.31it/s]\u001b[A\n",
" 47% 118/250 [00:08<00:09, 14.35it/s]\u001b[A\n",
" 48% 120/250 [00:08<00:09, 14.41it/s]\u001b[A\n",
" 49% 122/250 [00:08<00:08, 14.47it/s]\u001b[A\n",
" 50% 124/250 [00:08<00:08, 14.50it/s]\u001b[A\n",
" 50% 126/250 [00:08<00:08, 14.59it/s]\u001b[A\n",
" 51% 128/250 [00:08<00:08, 14.56it/s]\u001b[A\n",
" 52% 130/250 [00:08<00:08, 14.59it/s]\u001b[A\n",
" 53% 132/250 [00:09<00:08, 14.59it/s]\u001b[A\n",
" 54% 134/250 [00:09<00:07, 14.67it/s]\u001b[A\n",
" 54% 136/250 [00:09<00:07, 14.62it/s]\u001b[A\n",
" 55% 138/250 [00:09<00:07, 14.57it/s]\u001b[A\n",
" 56% 140/250 [00:09<00:07, 14.65it/s]\u001b[A\n",
" 57% 142/250 [00:09<00:07, 14.69it/s]\u001b[A\n",
" 58% 144/250 [00:09<00:07, 14.76it/s]\u001b[A\n",
" 58% 146/250 [00:09<00:07, 14.65it/s]\u001b[A\n",
" 59% 148/250 [00:10<00:06, 14.67it/s]\u001b[A\n",
" 60% 150/250 [00:10<00:06, 14.75it/s]\u001b[A\n",
" 61% 152/250 [00:10<00:06, 14.59it/s]\u001b[A\n",
" 62% 154/250 [00:10<00:06, 14.68it/s]\u001b[A\n",
" 62% 156/250 [00:10<00:06, 14.72it/s]\u001b[A\n",
" 63% 158/250 [00:10<00:06, 14.66it/s]\u001b[A\n",
" 64% 160/250 [00:10<00:06, 14.72it/s]\u001b[A\n",
" 65% 162/250 [00:11<00:05, 14.67it/s]\u001b[A\n",
" 66% 164/250 [00:11<00:05, 14.69it/s]\u001b[A\n",
" 66% 166/250 [00:11<00:05, 14.70it/s]\u001b[A\n",
" 67% 168/250 [00:11<00:05, 14.67it/s]\u001b[A\n",
" 68% 170/250 [00:11<00:05, 14.65it/s]\u001b[A\n",
" 69% 172/250 [00:11<00:05, 14.71it/s]\u001b[A\n",
" 70% 174/250 [00:11<00:05, 14.72it/s]\u001b[A\n",
" 70% 176/250 [00:12<00:05, 14.71it/s]\u001b[A\n",
" 71% 178/250 [00:12<00:04, 14.68it/s]\u001b[A\n",
" 72% 180/250 [00:12<00:04, 14.56it/s]\u001b[A\n",
" 73% 182/250 [00:12<00:04, 14.55it/s]\u001b[A\n",
" 74% 184/250 [00:12<00:04, 14.62it/s]\u001b[A\n",
" 74% 186/250 [00:12<00:04, 14.63it/s]\u001b[A\n",
" 75% 188/250 [00:12<00:04, 14.64it/s]\u001b[A\n",
" 76% 190/250 [00:12<00:04, 14.71it/s]\u001b[A\n",
" 77% 192/250 [00:13<00:03, 14.64it/s]\u001b[A\n",
" 78% 194/250 [00:13<00:03, 14.71it/s]\u001b[A\n",
" 78% 196/250 [00:13<00:03, 14.66it/s]\u001b[A\n",
" 79% 198/250 [00:13<00:03, 14.67it/s]\u001b[A\n",
" 80% 200/250 [00:13<00:03, 14.73it/s]\u001b[A\n",
" 81% 202/250 [00:13<00:03, 14.69it/s]\u001b[A\n",
" 82% 204/250 [00:13<00:03, 14.60it/s]\u001b[A\n",
" 82% 206/250 [00:14<00:03, 14.59it/s]\u001b[A\n",
" 83% 208/250 [00:14<00:02, 14.49it/s]\u001b[A\n",
"100% 2500/2500 [06:49<00:00, 10.84it/s]\n",
" 85% 212/250 [00:14<00:02, 14.53it/s]\u001b[A\n",
" 86% 214/250 [00:14<00:02, 14.51it/s]\u001b[A\n",
" 86% 216/250 [00:14<00:02, 14.54it/s]\u001b[A\n",
" 87% 218/250 [00:14<00:02, 14.56it/s]\u001b[A\n",
" 88% 220/250 [00:15<00:02, 14.67it/s]\u001b[A\n",
" 89% 222/250 [00:15<00:01, 14.66it/s]\u001b[A\n",
" 90% 224/250 [00:15<00:01, 14.68it/s]\u001b[A\n",
" 90% 226/250 [00:15<00:01, 14.68it/s]\u001b[A\n",
" 91% 228/250 [00:15<00:01, 14.78it/s]\u001b[A\n",
" 92% 230/250 [00:15<00:01, 14.83it/s]\u001b[A\n",
" 93% 232/250 [00:15<00:01, 14.82it/s]\u001b[A\n",
" 94% 234/250 [00:15<00:01, 14.74it/s]\u001b[A\n",
" 94% 236/250 [00:16<00:00, 14.72it/s]\u001b[A\n",
" 95% 238/250 [00:16<00:00, 14.71it/s]\u001b[A\n",
" 96% 240/250 [00:16<00:00, 14.70it/s]\u001b[A\n",
" 97% 242/250 [00:16<00:00, 14.73it/s]\u001b[A\n",
" 98% 244/250 [00:16<00:00, 14.74it/s]\u001b[A\n",
" 98% 246/250 [00:16<00:00, 14.65it/s]\u001b[A\n",
" 99% 248/250 [00:16<00:00, 14.69it/s]\u001b[A\n",
"100% 250/250 [00:17<00:00, 14.64it/s]\u001b[A\n",
"{'eval_loss': 0.7921838760375977, 'eval_bleu': 0.0, 'eval_accuracy': 1.0, 'eval_gen_len': 2.0, 'eval_runtime': 17.2721, 'eval_samples_per_second': 115.794, 'eval_steps_per_second': 14.474, 'epoch': 1.25}\n",
"\n",
"100% 2500/2500 [06:52<00:00, 10.84it/s]\n",
" \u001b[A[INFO|trainer.py:2656] 2023-02-14 22:11:23,556 >> Saving model checkpoint to out/emotion/t5_v1_1/checkpoint-2500\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:11:23,557 >> Configuration saved in out/emotion/t5_v1_1/checkpoint-2500/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:11:24,033 >> Model weights saved in out/emotion/t5_v1_1/checkpoint-2500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:11:24,034 >> tokenizer config file saved in out/emotion/t5_v1_1/checkpoint-2500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:11:24,034 >> Special tokens file saved in out/emotion/t5_v1_1/checkpoint-2500/special_tokens_map.json\n",
"[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:11:24,070 >> Copy vocab file to out/emotion/t5_v1_1/checkpoint-2500/spiece.model\n",
"[INFO|trainer.py:1852] 2023-02-14 22:11:24,853 >> \n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"[INFO|trainer.py:1946] 2023-02-14 22:11:24,854 >> Loading best model from out/emotion/t5_v1_1/checkpoint-500 (score: 1.0).\n",
"{'train_runtime': 414.2608, 'train_samples_per_second': 48.279, 'train_steps_per_second': 6.035, 'train_loss': 3.8232721221923827, 'epoch': 1.25}\n",
"100% 2500/2500 [06:54<00:00, 6.03it/s]\n",
"[INFO|trainer.py:2656] 2023-02-14 22:11:25,173 >> Saving model checkpoint to out/emotion/t5_v1_1\n",
"[INFO|configuration_utils.py:447] 2023-02-14 22:11:25,174 >> Configuration saved in out/emotion/t5_v1_1/config.json\n",
"[INFO|modeling_utils.py:1624] 2023-02-14 22:11:25,662 >> Model weights saved in out/emotion/t5_v1_1/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2123] 2023-02-14 22:11:25,663 >> tokenizer config file saved in out/emotion/t5_v1_1/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2130] 2023-02-14 22:11:25,663 >> Special tokens file saved in out/emotion/t5_v1_1/special_tokens_map.json\n",
"[INFO|tokenization_t5_fast.py:187] 2023-02-14 22:11:25,703 >> Copy vocab file to out/emotion/t5_v1_1/spiece.model\n",
"***** train metrics *****\n",
" epoch = 1.25\n",
" train_loss = 3.8233\n",
" train_runtime = 0:06:54.26\n",
" train_samples = 16000\n",
" train_samples_per_second = 48.279\n",
" train_steps_per_second = 6.035\n",
"INFO:__main__:*** Evaluate ***\n",
"[INFO|trainer.py:2907] 2023-02-14 22:11:25,713 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:11:25,713 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:11:25,713 >> Batch size = 8\n",
"100% 250/250 [00:17<00:00, 14.50it/s]\n",
"***** eval metrics *****\n",
" epoch = 1.25\n",
" eval_accuracy = 1.0\n",
" eval_bleu = 0.0\n",
" eval_gen_len = 2.0\n",
" eval_loss = 2.1697\n",
" eval_runtime = 0:00:17.31\n",
" eval_samples = 2000\n",
" eval_samples_per_second = 115.494\n",
" eval_steps_per_second = 14.437\n",
"INFO:__main__:*** Predict ***\n",
"[INFO|trainer.py:2907] 2023-02-14 22:11:43,033 >> ***** Running Prediction *****\n",
"[INFO|trainer.py:2909] 2023-02-14 22:11:43,033 >> Num examples = 2000\n",
"[INFO|trainer.py:2912] 2023-02-14 22:11:43,034 >> Batch size = 8\n",
"100% 250/250 [00:17<00:00, 14.58it/s]\n",
"***** predict metrics *****\n",
" predict_accuracy = 1.0\n",
" predict_bleu = 0.0\n",
" predict_gen_len = 2.0\n",
" predict_loss = 2.1029\n",
" predict_runtime = 0:00:17.21\n",
" predict_samples = 2000\n",
" predict_samples_per_second = 116.158\n",
" predict_steps_per_second = 14.52\n",
"[INFO|modelcard.py:444] 2023-02-14 22:12:00,417 >> Dropping the following result as it does not have all the necessary fields:\n",
"{'task': {'name': 'Translation', 'type': 'translation'}, 'metrics': [{'name': 'Bleu', 'type': 'bleu', 'value': 0.0}, {'name': 'Accuracy', 'type': 'accuracy', 'value': 1.0}]}\n"
]
}
],
"source": [
"!python run_translation.py \\\n",
" --cache_dir t5_cache_training \\\n",
" --model_name_or_path \"google/t5-v1_1-small\" \\\n",
" --train_file data/s2s-train.json \\\n",
" --validation_file data/s2s-valid.json \\\n",
" --test_file data/s2s-test.json \\\n",
" --per_device_train_batch_size 8 \\\n",
" --per_device_eval_batch_size 8 \\\n",
" --source_lang \"text\" \\\n",
" --target_lang \"label\" \\\n",
" --source_prefix \"emotion classification\" \\\n",
" --max_source_length 256 \\\n",
" --max_target_length 128 \\\n",
" --generation_max_length 128 \\\n",
" --do_train \\\n",
" --do_eval \\\n",
" --do_predict \\\n",
" --predict_with_generate \\\n",
" --num_train_epochs 1 \\\n",
" --output_dir out/emotion/t5_v1_1 \\\n",
" --overwrite_output_dir \\\n",
" --eval_steps 250 \\\n",
" --evaluation_strategy steps \\\n",
" --metric_for_best_model accuracy \\\n",
" --logging_steps 100 \\\n",
" --save_total_limit 5 \\\n",
" --max_steps 2500 \\\n",
" --load_best_model_at_end True "
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "XyC_7Ov07ICm"
},
"source": [
"# **FLAN T5**"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"id": "nX6LOzsF7ICm"
},
"outputs": [],
"source": [
"from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM\n",
"import json"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"id": "EEuIugWA7ICm"
},
"outputs": [],
"source": [
"if torch.cuda.is_available():\n",
" device = 0\n",
"else:\n",
" device = -1"
]
},
{
"cell_type": "code",
"source": [
"def perform_shot_learning(pipeline_type, model_name, test_file):\n",
" class_type = AutoModelForSeq2SeqLM\n",
" model = class_type.from_pretrained(model_name, torch_dtype=torch.float32)\n",
" tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
"\n",
" our_pipeline = pipeline(pipeline_type, model=model, tokenizer=tokenizer, device=device)\n",
"\n",
" correct = 0\n",
"\n",
" labels = \"possible labels: sadness, joy, love, anger, fear, surprise\"\n",
"\n",
" with open(test_file) as f:\n",
" f_lines = f.readlines()\n",
" for line in f_lines:\n",
" ex = json.loads(line)\n",
" prompt = ex['text']\n",
"\n",
" tmp = labels + '\\n' + f'text: {prompt}' + '\\n' + 'label: '\n",
" \n",
" predict = our_pipeline(tmp, do_sample=False)[0]['generated_text']\n",
"\n",
" if predict == ex['label']:\n",
" correct += 1\n",
"\n",
" print(f'Accuracy: {correct/len(f_lines)}')"
],
"metadata": {
"id": "AtDz85GKalzg"
},
"execution_count": 15,
"outputs": []
},
{
"cell_type": "code",
"source": [
"test_ds = 'data/s2s-test.json'"
],
"metadata": {
"id": "q9-4fzxpaoff"
},
"execution_count": 16,
"outputs": []
},
{
"cell_type": "code",
"source": [
"perform_shot_learning('text2text-generation', 'google/flan-t5-large', test_ds)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 219,
"referenced_widgets": [
"18f03144f5194bd2a88064eaae1140f0",
"36b8333766d44ee2aaa8da8ee75975d2",
"f7a9b125cf1346468e428abd689ff800",
"9b9e6161874f41c98d5c5e55d8d4fc86",
"9925a6f17ba14eee96332f0ea1dc88e5",
"dce64adfb8334591a8ce182918ecb4e3",
"9efd8cd2208245aca3f369f0735e2ee1",
"3d05704ffb0040c8b5bfb5c068c3329b",
"9564dcdd10c64072bb09e70def311ff3",
"f406c9b52a274068bd636554558497b2",
"d97be50f8cc64f8680a6cce112863255",
"7d6b88e56dad4dcbb0f1b1720f1ff118",
"eabc78cbdeef40feb36cf90fdbcdfbc7",
"6477d99dffbc4cf39e2c6998f71e37f7",
"d63511a8852942309cabe53720939fcc",
"3096b59f64eb48659a8eedea5a171be4",
"acc58b06f3b54801b10ee872fab39e6e",
"c2bd9c9ddab848529e52adfdc7634044",
"8d7e8c29d7e247f1b55d329d40508526",
"457b70adcab0464c9f990b13f433c635",
"0858fe327ec549b488f6169de1d84654",
"e18a505153c7491f8900142fb1189cd7",
"945026e5e11448b39ab37fb2a0bd963c",
"8c3aa97d58cb4f21b59af6253c952859",
"848ff807a83c4a79a1b3d7d80c29499c",
"a7b1f6722fcd4e90811041b24df0fe7b",
"f815d05091814c39a467cd8f528db504",
"915449ab41d848d39d801b4feb932a4f",
"2937b015455647abb7a524f858a881d2",
"c2b6cda9a8e94f7e97d7fb032b8e2bc5",
"af885a022ad743098e5037e1c8dc760a",
"088ec36aff7f415abfc4fd926fa0f902",
"b1b99d863dc64208afc11416d4936c2c",
"cb9e02be7ec44f6bb6b8771691c114e4",
"f68a247bddf9484e9f7b1666802f4612",
"d8d89ac972084304bff515a16e009452",
"3495b00846ae49acbb0cf3e15edf361e",
"60f6f23e78ce4ee2abf7389ab936c3ac",
"9d428e02c4134510baf179ce9137d90c",
"5298f4cd4e2e404ea66d70c62bcfe439",
"cd9fdc3eb94a4d00b5af6115318dcf45",
"d664c674a977456cad109347c0206d0e",
"17e5dedc0aeb4a1da32113e51158fd74",
"9b70ec9f110f4080a6a26fd12044fe94"
]
},
"id": "7fWzF9PVatgL",
"outputId": "6c37c046-a14c-4cab-e285-fa1ddfeb3241"
},
"execution_count": 17,
"outputs": [
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading (…)okenizer_config.json: 0%| | 0.00/2.54k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "18f03144f5194bd2a88064eaae1140f0"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading (…)\"spiece.model\";: 0%| | 0.00/792k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "7d6b88e56dad4dcbb0f1b1720f1ff118"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading (…)/main/tokenizer.json: 0%| | 0.00/2.42M [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "945026e5e11448b39ab37fb2a0bd963c"
}
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"Downloading (…)cial_tokens_map.json: 0%| | 0.00/2.20k [00:00<?, ?B/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "cb9e02be7ec44f6bb6b8771691c114e4"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.8/dist-packages/transformers/pipelines/base.py:1043: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
" warnings.warn(\n"
]
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"Accuracy: 0.647\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!zip -r /content/projekt.zip /content/"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "mJfe_hnJ_qVC",
"outputId": "ebdda236-1053-4b29-809d-7be9247edf19"
},
"execution_count": 18,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" adding: content/ (stored 0%)\n",
" adding: content/.config/ (stored 0%)\n",
" adding: content/.config/config_sentinel (stored 0%)\n",
" adding: content/.config/logs/ (stored 0%)\n",
" adding: content/.config/logs/2023.02.10/ (stored 0%)\n",
" adding: content/.config/logs/2023.02.10/14.32.38.026074.log (deflated 58%)\n",
" adding: content/.config/logs/2023.02.10/14.33.38.691407.log (deflated 56%)\n",
" adding: content/.config/logs/2023.02.10/14.33.11.427170.log (deflated 58%)\n",
" adding: content/.config/logs/2023.02.10/14.33.37.863925.log (deflated 57%)\n",
" adding: content/.config/logs/2023.02.10/14.32.12.281772.log (deflated 91%)\n",
" adding: content/.config/logs/2023.02.10/14.33.03.230973.log (deflated 86%)\n",
" adding: content/.config/gce (stored 0%)\n",
" adding: content/.config/.last_survey_prompt.yaml (stored 0%)\n",
" adding: content/.config/configurations/ (stored 0%)\n",
" adding: content/.config/configurations/config_default (deflated 15%)\n",
" adding: content/.config/active_config (stored 0%)\n",
" adding: content/.config/.last_update_check.json (deflated 22%)\n",
" adding: content/.config/.last_opt_in_prompt.yaml (stored 0%)\n",
" adding: content/__pycache__/ (stored 0%)\n",
" adding: content/__pycache__/roberta.cpython-38.pyc (deflated 62%)\n",
" adding: content/__pycache__/gpt2.cpython-38.pyc (deflated 53%)\n",
" adding: content/data/ (stored 0%)\n",
" adding: content/data/.ipynb_checkpoints/ (stored 0%)\n",
" adding: content/data/test.json (deflated 69%)\n",
" adding: content/data/s2s-test.json (deflated 70%)\n",
" adding: content/data/s2s-valid.json (deflated 70%)\n",
" adding: content/data/valid.json (deflated 69%)\n",
" adding: content/data/s2s-train.json (deflated 70%)\n",
" adding: content/data/train.json (deflated 69%)\n",
" adding: content/req.txt (deflated 30%)\n",
" adding: content/.cache_training_roberta/ (stored 0%)\n",
" adding: content/.cache_training_roberta/.cache_training_roberta_json_default-1808ac39383e9432_0.0.0_0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.lock (stored 0%)\n",
" adding: content/.cache_training_roberta/json/ (stored 0%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/ (stored 0%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/ (stored 0%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.incomplete_info.lock (stored 0%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/ (stored 0%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-ff4234a2fb1a9582.arrow (deflated 88%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-test.arrow (deflated 64%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-6bbf8957e5f0cf7b.arrow (deflated 88%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-train.arrow (deflated 64%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/dataset_info.json (deflated 57%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-5efe26f1bca5cac0.arrow (deflated 88%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-validation.arrow (deflated 64%)\n",
" adding: content/.cache_training_roberta/json/default-1808ac39383e9432/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51_builder.lock (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/ (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/blobs/ (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/blobs/226b0752cac7789c48f0cb3ec53eda48b7be36cc (deflated 53%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/blobs/5606f48548d99a9829d10a96cd364b816b02cd21 (deflated 63%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/blobs/ad0bcbeb288f0d1373d88e0762e66357f55b8311 (deflated 59%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/blobs/8db5e7ac5bfc9ec8b613b776009300fe3685d957 (deflated 47%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/blobs/278b7a95739c4392fae9b818bb5343dde20be1b89318f37a6d939e1e1b9e461b (deflated 41%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/refs/ (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/refs/main (deflated 3%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/.no_exist/ (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/ (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/tokenizer_config.json (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/added_tokens.json (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/special_tokens_map.json (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/snapshots/ (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/ (stored 0%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json (deflated 47%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/tokenizer.json (deflated 59%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/merges.txt (deflated 53%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/vocab.json (deflated 63%)\n",
" adding: content/.cache_training_roberta/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/pytorch_model.bin (deflated 41%)\n",
" adding: content/cache_training_t5/ (stored 0%)\n",
" adding: content/cache_training_t5/cache_training_t5_json_default-25a5883a4a222bad_0.0.0_0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.lock (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/ (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/blobs/ (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/blobs/4e28ff6ebdf584f5372d9de68867399142435d9a (deflated 48%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/blobs/b114c318caf72f6e89ea92e0755c41327a453198 (deflated 82%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/blobs/07b81619b82546ab7f30e06c9615c7fca8fe3abd (deflated 44%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/blobs/881bdbffc06e471924ecea57f962bc5f8e2a9f21 (deflated 83%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/blobs/7c9a3e998a8c74b52484f3a1ccfdcc9767972ee6b34ae7a527cdf6f972a34163 (deflated 53%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/refs/ (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/refs/main (deflated 5%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/.no_exist/ (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/.no_exist/8a88af75516269158a3aa488d1abdfd3d5e4ee49/ (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/.no_exist/8a88af75516269158a3aa488d1abdfd3d5e4ee49/tokenizer.json (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/.no_exist/8a88af75516269158a3aa488d1abdfd3d5e4ee49/added_tokens.json (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/ (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/ (stored 0%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json (deflated 44%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/tokenizer_config.json (deflated 82%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/spiece.model (deflated 48%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/special_tokens_map.json (deflated 83%)\n",
" adding: content/cache_training_t5/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/pytorch_model.bin (deflated 53%)\n",
" adding: content/cache_training_t5/json/ (stored 0%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/ (stored 0%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/ (stored 0%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.incomplete_info.lock (stored 0%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/ (stored 0%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-bef49b953c77fdf0.arrow (deflated 74%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-105206b5fd478147.arrow (deflated 74%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-040b968aed3576f7.arrow (deflated 74%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-test.arrow (deflated 62%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-f37cf2f406b18541.arrow (deflated 74%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-train.arrow (deflated 62%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/dataset_info.json (deflated 58%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-b0aef076d30fe2f7.arrow (deflated 74%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-validation.arrow (deflated 62%)\n",
" adding: content/cache_training_t5/json/default-25a5883a4a222bad/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51_builder.lock (stored 0%)\n",
" adding: content/run_glue.py (deflated 73%)\n",
" adding: content/run_translation.py (deflated 74%)\n",
" adding: content/roberta_custom_training_cache/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/roberta_custom_training_cache_json_default-01aa9d8252a24a0d_0.0.0_0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.lock (stored 0%)\n",
" adding: content/roberta_custom_training_cache/json/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.incomplete_info.lock (stored 0%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-test.arrow (deflated 64%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-train.arrow (deflated 64%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/dataset_info.json (deflated 57%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-e62b2012f3f40cb2.arrow (deflated 88%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-cd497527f5c67ba7.arrow (deflated 88%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-9c2deb15eb4326c1.arrow (deflated 88%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-validation.arrow (deflated 64%)\n",
" adding: content/roberta_custom_training_cache/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51_builder.lock (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/blobs/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/blobs/226b0752cac7789c48f0cb3ec53eda48b7be36cc (deflated 53%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/blobs/5606f48548d99a9829d10a96cd364b816b02cd21 (deflated 63%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/blobs/ad0bcbeb288f0d1373d88e0762e66357f55b8311 (deflated 59%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/blobs/8db5e7ac5bfc9ec8b613b776009300fe3685d957 (deflated 47%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/blobs/278b7a95739c4392fae9b818bb5343dde20be1b89318f37a6d939e1e1b9e461b (deflated 41%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/refs/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/refs/main (deflated 3%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/.no_exist/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/tokenizer_config.json (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/added_tokens.json (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/.no_exist/ff46155979338ff8063cdad90908b498ab91b181/special_tokens_map.json (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/ (stored 0%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/config.json (deflated 47%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/tokenizer.json (deflated 59%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/merges.txt (deflated 53%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/vocab.json (deflated 63%)\n",
" adding: content/roberta_custom_training_cache/models--roberta-base/snapshots/ff46155979338ff8063cdad90908b498ab91b181/pytorch_model.bin (deflated 41%)\n",
" adding: content/gtp_cache_training/ (stored 0%)\n",
" adding: content/gtp_cache_training/json/ (stored 0%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/ (stored 0%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/ (stored 0%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.incomplete_info.lock (stored 0%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/ (stored 0%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-test.arrow (deflated 64%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-train.arrow (deflated 64%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-7b339bb99d7c17a1.arrow (deflated 88%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/dataset_info.json (deflated 57%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-82acdaa33d6aa0eb.arrow (deflated 88%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-validation.arrow (deflated 64%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-bb8faaac56c0b87e.arrow (deflated 88%)\n",
" adding: content/gtp_cache_training/json/default-01aa9d8252a24a0d/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51_builder.lock (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/ (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/blobs/ (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/blobs/226b0752cac7789c48f0cb3ec53eda48b7be36cc (deflated 53%)\n",
" adding: content/gtp_cache_training/models--gpt2/blobs/7c5d3f4b8b76583b422fcb9189ad6c89d5d97a094541ce8932dce3ecabde1421 (deflated 16%)\n",
" adding: content/gtp_cache_training/models--gpt2/blobs/1f1d9aaca301414e7f6c9396df506798ff4eb9a6 (deflated 67%)\n",
" adding: content/gtp_cache_training/models--gpt2/blobs/10c66461e4c109db5a2196bff4bb59be30396ed8 (deflated 50%)\n",
" adding: content/gtp_cache_training/models--gpt2/blobs/4b988bccc9dc5adacd403c00b4704976196548f8 (deflated 59%)\n",
" adding: content/gtp_cache_training/models--gpt2/refs/ (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/refs/main (deflated 3%)\n",
" adding: content/gtp_cache_training/models--gpt2/.no_exist/ (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/.no_exist/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/ (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/.no_exist/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer_config.json (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/.no_exist/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/added_tokens.json (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/.no_exist/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/special_tokens_map.json (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/snapshots/ (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/ (stored 0%)\n",
" adding: content/gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json (deflated 50%)\n",
" adding: content/gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer.json (deflated 59%)\n",
" adding: content/gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/merges.txt (deflated 53%)\n",
" adding: content/gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/vocab.json (deflated 67%)\n",
" adding: content/gtp_cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin (deflated 16%)\n",
" adding: content/gtp_cache_training/gtp_cache_training_json_default-01aa9d8252a24a0d_0.0.0_0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.lock (stored 0%)\n",
" adding: content/t5_cache_training/ (stored 0%)\n",
" adding: content/t5_cache_training/t5_cache_training_json_default-a82ca4164dba097e_0.0.0_0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.lock (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/ (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/blobs/ (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/blobs/4e28ff6ebdf584f5372d9de68867399142435d9a (deflated 48%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/blobs/b114c318caf72f6e89ea92e0755c41327a453198 (deflated 82%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/blobs/07b81619b82546ab7f30e06c9615c7fca8fe3abd (deflated 44%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/blobs/881bdbffc06e471924ecea57f962bc5f8e2a9f21 (deflated 83%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/blobs/7c9a3e998a8c74b52484f3a1ccfdcc9767972ee6b34ae7a527cdf6f972a34163 (deflated 53%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/refs/ (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/refs/main (deflated 5%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/.no_exist/ (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/.no_exist/8a88af75516269158a3aa488d1abdfd3d5e4ee49/ (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/.no_exist/8a88af75516269158a3aa488d1abdfd3d5e4ee49/tokenizer.json (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/.no_exist/8a88af75516269158a3aa488d1abdfd3d5e4ee49/added_tokens.json (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/ (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/ (stored 0%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/config.json (deflated 44%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/tokenizer_config.json (deflated 82%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/spiece.model (deflated 48%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/special_tokens_map.json (deflated 83%)\n",
" adding: content/t5_cache_training/models--google--t5-v1_1-small/snapshots/8a88af75516269158a3aa488d1abdfd3d5e4ee49/pytorch_model.bin (deflated 53%)\n",
" adding: content/t5_cache_training/json/ (stored 0%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/ (stored 0%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/ (stored 0%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51.incomplete_info.lock (stored 0%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/ (stored 0%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-test.arrow (deflated 62%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-988bff0993eee389.arrow (deflated 74%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-train.arrow (deflated 62%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/dataset_info.json (deflated 58%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-fa17416eabe18767.arrow (deflated 74%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-c6cebbf9290f7df0.arrow (deflated 74%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/json-validation.arrow (deflated 62%)\n",
" adding: content/t5_cache_training/json/default-a82ca4164dba097e/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51_builder.lock (stored 0%)\n",
" adding: content/out/ (stored 0%)\n",
" adding: content/out/emotion/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/scheduler.pt (deflated 49%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/trainer_state.json (deflated 79%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/optimizer.pt (deflated 30%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2000/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2_custom/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2_custom/all_results.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2_custom/predict_results_None.txt (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2_custom/trainer_state.json (deflated 80%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/scheduler.pt (deflated 49%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/trainer_state.json (deflated 77%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/optimizer.pt (deflated 30%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1500/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2_custom/train_results.json (deflated 40%)\n",
" adding: content/out/emotion/gpt2_custom/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2_custom/eval_results.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/scheduler.pt (deflated 50%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/trainer_state.json (deflated 80%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/optimizer.pt (deflated 30%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-2500/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2_custom/runs/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-11-35_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-11-35_fc0011e45a00/1676409101.551365/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-11-35_fc0011e45a00/1676409101.551365/events.out.tfevents.1676409101.fc0011e45a00.60473.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-11-35_fc0011e45a00/events.out.tfevents.1676409101.fc0011e45a00.60473.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-53_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-53_fc0011e45a00/events.out.tfevents.1676407620.fc0011e45a00.53924.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-53_fc0011e45a00/1676407620.269752/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-53_fc0011e45a00/1676407620.269752/events.out.tfevents.1676407620.fc0011e45a00.53924.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00/events.out.tfevents.1676411802.fc0011e45a00.72811.0 (deflated 63%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00/events.out.tfevents.1676412248.fc0011e45a00.72811.2 (deflated 28%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00/1676411802.9557116/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-56-28_fc0011e45a00/1676411802.9557116/events.out.tfevents.1676411802.fc0011e45a00.72811.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-13-12_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-13-12_fc0011e45a00/events.out.tfevents.1676409199.fc0011e45a00.60936.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-13-12_fc0011e45a00/1676409199.1303008/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-13-12_fc0011e45a00/1676409199.1303008/events.out.tfevents.1676409199.fc0011e45a00.60936.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-59-18_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-59-18_fc0011e45a00/1676408364.7675455/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-59-18_fc0011e45a00/1676408364.7675455/events.out.tfevents.1676408364.fc0011e45a00.57251.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-59-18_fc0011e45a00/events.out.tfevents.1676408364.fc0011e45a00.57251.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-14-48_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-14-48_fc0011e45a00/events.out.tfevents.1676409294.fc0011e45a00.61381.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-14-48_fc0011e45a00/1676409294.483754/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-14-48_fc0011e45a00/1676409294.483754/events.out.tfevents.1676409294.fc0011e45a00.61381.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-07_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-07_fc0011e45a00/events.out.tfevents.1676407574.fc0011e45a00.53675.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-07_fc0011e45a00/1676407574.5370467/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-46-07_fc0011e45a00/1676407574.5370467/events.out.tfevents.1676407574.fc0011e45a00.53675.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-15-57_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-15-57_fc0011e45a00/1676409363.3658211/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-15-57_fc0011e45a00/1676409363.3658211/events.out.tfevents.1676409363.fc0011e45a00.61724.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-15-57_fc0011e45a00/events.out.tfevents.1676409363.fc0011e45a00.61724.0 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-44-02_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-44-02_fc0011e45a00/events.out.tfevents.1676407449.fc0011e45a00.53094.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-44-02_fc0011e45a00/1676407449.3215246/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-44-02_fc0011e45a00/1676407449.3215246/events.out.tfevents.1676407449.fc0011e45a00.53094.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-09-03_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-09-03_fc0011e45a00/events.out.tfevents.1676408949.fc0011e45a00.59782.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-09-03_fc0011e45a00/1676408949.6798263/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-09-03_fc0011e45a00/1676408949.6798263/events.out.tfevents.1676408949.fc0011e45a00.59782.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-41-48_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-41-48_fc0011e45a00/events.out.tfevents.1676410915.fc0011e45a00.68705.0 (deflated 57%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-41-48_fc0011e45a00/1676410915.0364006/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_21-41-48_fc0011e45a00/1676410915.0364006/events.out.tfevents.1676410915.fc0011e45a00.68705.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-48-55_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-48-55_fc0011e45a00/events.out.tfevents.1676407741.fc0011e45a00.54546.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-48-55_fc0011e45a00/1676407741.3566854/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-48-55_fc0011e45a00/1676407741.3566854/events.out.tfevents.1676407741.fc0011e45a00.54546.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-47-46_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-47-46_fc0011e45a00/events.out.tfevents.1676407672.fc0011e45a00.54203.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-47-46_fc0011e45a00/1676407672.9366086/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-47-46_fc0011e45a00/1676407672.9366086/events.out.tfevents.1676407672.fc0011e45a00.54203.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-56-39_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-56-39_fc0011e45a00/events.out.tfevents.1676408205.fc0011e45a00.56536.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-56-39_fc0011e45a00/1676408205.8404686/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-56-39_fc0011e45a00/1676408205.8404686/events.out.tfevents.1676408205.fc0011e45a00.56536.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-55-46_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-55-46_fc0011e45a00/1676408153.0722597/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-55-46_fc0011e45a00/1676408153.0722597/events.out.tfevents.1676408153.fc0011e45a00.56263.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2_custom/runs/Feb14_20-55-46_fc0011e45a00/events.out.tfevents.1676408153.fc0011e45a00.56263.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/scheduler.pt (deflated 49%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/trainer_state.json (deflated 75%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/optimizer.pt (deflated 30%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-1000/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2_custom/README.md (deflated 54%)\n",
" adding: content/out/emotion/gpt2_custom/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2_custom/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2_custom/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/ (stored 0%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/scheduler.pt (deflated 49%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/trainer_state.json (deflated 67%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/optimizer.pt (deflated 31%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2_custom/checkpoint-500/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2_custom/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/scheduler.pt (deflated 49%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/trainer_state.json (deflated 80%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/optimizer.pt (deflated 29%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2000/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2/all_results.json (deflated 55%)\n",
" adding: content/out/emotion/gpt2/predict_results_None.txt (deflated 62%)\n",
" adding: content/out/emotion/gpt2/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2/trainer_state.json (deflated 81%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/scheduler.pt (deflated 49%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/trainer_state.json (deflated 78%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/optimizer.pt (deflated 29%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1500/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2/train_results.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2/eval_results.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/scheduler.pt (deflated 50%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/trainer_state.json (deflated 81%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/optimizer.pt (deflated 29%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2/checkpoint-2500/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2/runs/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00/events.out.tfevents.1676411778.fc0011e45a00.70872.2 (deflated 28%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00/1676411348.7268953/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00/1676411348.7268953/events.out.tfevents.1676411348.fc0011e45a00.70872.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_21-48-55_fc0011e45a00/events.out.tfevents.1676411348.fc0011e45a00.70872.0 (deflated 63%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_20-34-05_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_20-34-05_fc0011e45a00/events.out.tfevents.1676407272.fc0011e45a00.50524.2 (deflated 28%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_20-34-05_fc0011e45a00/events.out.tfevents.1676406850.fc0011e45a00.50524.0 (deflated 63%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_20-34-05_fc0011e45a00/1676406850.2390406/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_20-34-05_fc0011e45a00/1676406850.2390406/events.out.tfevents.1676406850.fc0011e45a00.50524.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_19-44-33_fc0011e45a00/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_19-44-33_fc0011e45a00/events.out.tfevents.1676403875.fc0011e45a00.37469.0 (deflated 60%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_19-44-33_fc0011e45a00/1676403875.9091897/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/runs/Feb14_19-44-33_fc0011e45a00/1676403875.9091897/events.out.tfevents.1676403875.fc0011e45a00.37469.1 (deflated 62%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/scheduler.pt (deflated 49%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/trainer_state.json (deflated 75%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/optimizer.pt (deflated 29%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2/checkpoint-1000/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2/README.md (deflated 54%)\n",
" adding: content/out/emotion/gpt2/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/ (stored 0%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/scheduler.pt (deflated 49%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/rng_state.pth (deflated 28%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/config.json (deflated 56%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/tokenizer_config.json (deflated 41%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/trainer_state.json (deflated 67%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/tokenizer.json (deflated 72%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/optimizer.pt (deflated 30%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/training_args.bin (deflated 48%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/special_tokens_map.json (deflated 60%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/merges.txt (deflated 53%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/vocab.json (deflated 59%)\n",
" adding: content/out/emotion/gpt2/checkpoint-500/pytorch_model.bin (deflated 9%)\n",
" adding: content/out/emotion/gpt2/pytorch_model.bin\n",
"\n",
"\n",
"zip error: Interrupted (aborting)\n"
]
}
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
}
},
"colab": {
"provenance": []
},
"accelerator": "GPU",
"gpuClass": "premium",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"18f03144f5194bd2a88064eaae1140f0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_36b8333766d44ee2aaa8da8ee75975d2",
"IPY_MODEL_f7a9b125cf1346468e428abd689ff800",
"IPY_MODEL_9b9e6161874f41c98d5c5e55d8d4fc86"
],
"layout": "IPY_MODEL_9925a6f17ba14eee96332f0ea1dc88e5"
}
},
"36b8333766d44ee2aaa8da8ee75975d2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_dce64adfb8334591a8ce182918ecb4e3",
"placeholder": "",
"style": "IPY_MODEL_9efd8cd2208245aca3f369f0735e2ee1",
"value": "Downloading (…)okenizer_config.json: 100%"
}
},
"f7a9b125cf1346468e428abd689ff800": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_3d05704ffb0040c8b5bfb5c068c3329b",
"max": 2539,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_9564dcdd10c64072bb09e70def311ff3",
"value": 2539
}
},
"9b9e6161874f41c98d5c5e55d8d4fc86": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_f406c9b52a274068bd636554558497b2",
"placeholder": "",
"style": "IPY_MODEL_d97be50f8cc64f8680a6cce112863255",
"value": " 2.54k/2.54k [00:00&lt;00:00, 125kB/s]"
}
},
"9925a6f17ba14eee96332f0ea1dc88e5": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"dce64adfb8334591a8ce182918ecb4e3": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9efd8cd2208245aca3f369f0735e2ee1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"3d05704ffb0040c8b5bfb5c068c3329b": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9564dcdd10c64072bb09e70def311ff3": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"f406c9b52a274068bd636554558497b2": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d97be50f8cc64f8680a6cce112863255": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"7d6b88e56dad4dcbb0f1b1720f1ff118": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_eabc78cbdeef40feb36cf90fdbcdfbc7",
"IPY_MODEL_6477d99dffbc4cf39e2c6998f71e37f7",
"IPY_MODEL_d63511a8852942309cabe53720939fcc"
],
"layout": "IPY_MODEL_3096b59f64eb48659a8eedea5a171be4"
}
},
"eabc78cbdeef40feb36cf90fdbcdfbc7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_acc58b06f3b54801b10ee872fab39e6e",
"placeholder": "",
"style": "IPY_MODEL_c2bd9c9ddab848529e52adfdc7634044",
"value": "Downloading (…)&quot;spiece.model&quot;;: 100%"
}
},
"6477d99dffbc4cf39e2c6998f71e37f7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_8d7e8c29d7e247f1b55d329d40508526",
"max": 791656,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_457b70adcab0464c9f990b13f433c635",
"value": 791656
}
},
"d63511a8852942309cabe53720939fcc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_0858fe327ec549b488f6169de1d84654",
"placeholder": "",
"style": "IPY_MODEL_e18a505153c7491f8900142fb1189cd7",
"value": " 792k/792k [00:00&lt;00:00, 8.08MB/s]"
}
},
"3096b59f64eb48659a8eedea5a171be4": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"acc58b06f3b54801b10ee872fab39e6e": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"c2bd9c9ddab848529e52adfdc7634044": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"8d7e8c29d7e247f1b55d329d40508526": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"457b70adcab0464c9f990b13f433c635": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"0858fe327ec549b488f6169de1d84654": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"e18a505153c7491f8900142fb1189cd7": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"945026e5e11448b39ab37fb2a0bd963c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_8c3aa97d58cb4f21b59af6253c952859",
"IPY_MODEL_848ff807a83c4a79a1b3d7d80c29499c",
"IPY_MODEL_a7b1f6722fcd4e90811041b24df0fe7b"
],
"layout": "IPY_MODEL_f815d05091814c39a467cd8f528db504"
}
},
"8c3aa97d58cb4f21b59af6253c952859": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_915449ab41d848d39d801b4feb932a4f",
"placeholder": "",
"style": "IPY_MODEL_2937b015455647abb7a524f858a881d2",
"value": "Downloading (…)/main/tokenizer.json: 100%"
}
},
"848ff807a83c4a79a1b3d7d80c29499c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_c2b6cda9a8e94f7e97d7fb032b8e2bc5",
"max": 2424064,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_af885a022ad743098e5037e1c8dc760a",
"value": 2424064
}
},
"a7b1f6722fcd4e90811041b24df0fe7b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_088ec36aff7f415abfc4fd926fa0f902",
"placeholder": "",
"style": "IPY_MODEL_b1b99d863dc64208afc11416d4936c2c",
"value": " 2.42M/2.42M [00:00&lt;00:00, 18.4MB/s]"
}
},
"f815d05091814c39a467cd8f528db504": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"915449ab41d848d39d801b4feb932a4f": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"2937b015455647abb7a524f858a881d2": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"c2b6cda9a8e94f7e97d7fb032b8e2bc5": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"af885a022ad743098e5037e1c8dc760a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"088ec36aff7f415abfc4fd926fa0f902": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b1b99d863dc64208afc11416d4936c2c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"cb9e02be7ec44f6bb6b8771691c114e4": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_f68a247bddf9484e9f7b1666802f4612",
"IPY_MODEL_d8d89ac972084304bff515a16e009452",
"IPY_MODEL_3495b00846ae49acbb0cf3e15edf361e"
],
"layout": "IPY_MODEL_60f6f23e78ce4ee2abf7389ab936c3ac"
}
},
"f68a247bddf9484e9f7b1666802f4612": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_9d428e02c4134510baf179ce9137d90c",
"placeholder": "",
"style": "IPY_MODEL_5298f4cd4e2e404ea66d70c62bcfe439",
"value": "Downloading (…)cial_tokens_map.json: 100%"
}
},
"d8d89ac972084304bff515a16e009452": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_cd9fdc3eb94a4d00b5af6115318dcf45",
"max": 2201,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_d664c674a977456cad109347c0206d0e",
"value": 2201
}
},
"3495b00846ae49acbb0cf3e15edf361e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_17e5dedc0aeb4a1da32113e51158fd74",
"placeholder": "",
"style": "IPY_MODEL_9b70ec9f110f4080a6a26fd12044fe94",
"value": " 2.20k/2.20k [00:00&lt;00:00, 160kB/s]"
}
},
"60f6f23e78ce4ee2abf7389ab936c3ac": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9d428e02c4134510baf179ce9137d90c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"5298f4cd4e2e404ea66d70c62bcfe439": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"cd9fdc3eb94a4d00b5af6115318dcf45": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d664c674a977456cad109347c0206d0e": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"17e5dedc0aeb4a1da32113e51158fd74": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9b70ec9f110f4080a6a26fd12044fe94": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"nbformat": 4,
"nbformat_minor": 0
}