projekt-glebokie/GPT_2.ipynb

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "gpuClass": "standard",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "16f1b324020d48c3a8fd4487c42bbd6b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_df036cb95c56454998cb7c788d341584",
              "IPY_MODEL_4a0387dee622459498ddc9d7bf201187",
              "IPY_MODEL_604840d710474c71a676c8368c9b3f2f"
            ],
            "layout": "IPY_MODEL_7e3d18de5d554030bd6aa801ac7f3192"
          }
        },
        "df036cb95c56454998cb7c788d341584": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_419e5a2aab8147d79490748f633675cd",
            "placeholder": "",
            "style": "IPY_MODEL_41452374c2a64afd82d30744b36dd801",
            "value": "100%"
          }
        },
        "4a0387dee622459498ddc9d7bf201187": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_ea94ef66672d4b69bf0d5eac6f7dada3",
            "max": 3,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_a7e7dd3a259a4a878cfcd6a66ed35c7c",
            "value": 3
          }
        },
        "604840d710474c71a676c8368c9b3f2f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_275a9313a7e14c57b66cbe484499c8ec",
            "placeholder": "",
            "style": "IPY_MODEL_c01651c9c010429bbf5507770ce6b6ce",
            "value": " 3/3 [00:00&lt;00:00, 136.35it/s]"
          }
        },
        "7e3d18de5d554030bd6aa801ac7f3192": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "419e5a2aab8147d79490748f633675cd": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "41452374c2a64afd82d30744b36dd801": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "ea94ef66672d4b69bf0d5eac6f7dada3": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a7e7dd3a259a4a878cfcd6a66ed35c7c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "275a9313a7e14c57b66cbe484499c8ec": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c01651c9c010429bbf5507770ce6b6ce": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "JErLYXsaYy8-"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "! pip install datasets transformers torch scikit-learn evaluate"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "u29i-U30zRjY",
        "outputId": "55534ca2-097f-4e7a-a517-463f974148cf"
      },
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting datasets\n",
            "  Downloading datasets-2.9.0-py3-none-any.whl (462 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m462.8/462.8 KB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting transformers\n",
            "  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.3/6.3 MB\u001b[0m \u001b[31m55.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: torch in /usr/local/lib/python3.8/dist-packages (1.13.1+cu116)\n",
            "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.8/dist-packages (1.0.2)\n",
            "Collecting evaluate\n",
            "  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.4/81.4 KB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from datasets) (1.21.6)\n",
            "Collecting xxhash\n",
            "  Downloading xxhash-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (213 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m213.0/213.0 KB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.8/dist-packages (from datasets) (2023.1.0)\n",
            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.8/dist-packages (from datasets) (3.8.3)\n",
            "Collecting responses<0.19\n",
            "  Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.8/dist-packages (from datasets) (6.0)\n",
            "Requirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.8/dist-packages (from datasets) (9.0.0)\n",
            "Collecting huggingface-hub<1.0.0,>=0.2.0\n",
            "  Downloading huggingface_hub-0.12.0-py3-none-any.whl (190 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m190.3/190.3 KB\u001b[0m \u001b[31m16.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: dill<0.3.7 in /usr/local/lib/python3.8/dist-packages (from datasets) (0.3.6)\n",
            "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.8/dist-packages (from datasets) (4.64.1)\n",
            "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.8/dist-packages (from datasets) (2.25.1)\n",
            "Requirement already satisfied: pandas in /usr/local/lib/python3.8/dist-packages (from datasets) (1.3.5)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from datasets) (23.0)\n",
            "Collecting multiprocess\n",
            "  Downloading multiprocess-0.70.14-py38-none-any.whl (132 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m132.0/132.0 KB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.8/dist-packages (from transformers) (2022.6.2)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from transformers) (3.9.0)\n",
            "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1\n",
            "  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m51.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: typing-extensions in /usr/local/lib/python3.8/dist-packages (from torch) (4.4.0)\n",
            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (3.1.0)\n",
            "Requirement already satisfied: scipy>=1.1.0 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.7.3)\n",
            "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.2.0)\n",
            "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.8.2)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (22.2.0)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.3)\n",
            "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (4.0.2)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (6.0.4)\n",
            "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (2.1.1)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.1)\n",
            "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->datasets) (4.0.0)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->datasets) (1.24.3)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->datasets) (2.10)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->datasets) (2022.12.7)\n",
            "Collecting urllib3<1.27,>=1.21.1\n",
            "  Downloading urllib3-1.26.14-py2.py3-none-any.whl (140 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m140.6/140.6 KB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hRequirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas->datasets) (2022.7.1)\n",
            "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas->datasets) (2.8.2)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.8/dist-packages (from python-dateutil>=2.7.3->pandas->datasets) (1.15.0)\n",
            "Installing collected packages: tokenizers, xxhash, urllib3, multiprocess, responses, huggingface-hub, transformers, datasets, evaluate\n",
            "  Attempting uninstall: urllib3\n",
            "    Found existing installation: urllib3 1.24.3\n",
            "    Uninstalling urllib3-1.24.3:\n",
            "      Successfully uninstalled urllib3-1.24.3\n",
            "Successfully installed datasets-2.9.0 evaluate-0.4.0 huggingface-hub-0.12.0 multiprocess-0.70.14 responses-0.18.0 tokenizers-0.13.2 transformers-4.26.1 urllib3-1.26.14 xxhash-3.2.0\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [],
      "metadata": {
        "id": "a_f-yno_zity"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!wget 'https://git.wmi.amu.edu.pl/s444465/projekt-glebokie/raw/branch/master/run_glue.py' -O 'run_glue.py'\n",
        "!wget 'https://git.wmi.amu.edu.pl/s444465/projekt-glebokie/raw/branch/master/roberta.py' -O 'roberta.py'\n",
        "!wget 'https://git.wmi.amu.edu.pl/s444465/projekt-glebokie/raw/branch/master/gpt2.py' -O 'gpt2.py'"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "V_HmRNcmzhsw",
        "outputId": "feafb930-4dbf-436c-8e37-de4e8b8a32cc"
      },
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "--2023-02-12 21:57:57--  https://git.wmi.amu.edu.pl/s444465/projekt-glebokie/raw/branch/master/run_glue.py\n",
            "Resolving git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)... 150.254.78.40\n",
            "Connecting to git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)|150.254.78.40|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 30601 (30K) [text/plain]\n",
            "Saving to: ‘run_glue.py’\n",
            "\n",
            "run_glue.py         100%[===================>]  29.88K  --.-KB/s    in 0.1s    \n",
            "\n",
            "2023-02-12 21:57:58 (248 KB/s) - ‘run_glue.py’ saved [30601/30601]\n",
            "\n",
            "--2023-02-12 21:57:58--  https://git.wmi.amu.edu.pl/s444465/projekt-glebokie/raw/branch/master/roberta.py\n",
            "Resolving git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)... 150.254.78.40\n",
            "Connecting to git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)|150.254.78.40|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 12783 (12K) [text/plain]\n",
            "Saving to: ‘roberta.py’\n",
            "\n",
            "roberta.py          100%[===================>]  12.48K  --.-KB/s    in 0s      \n",
            "\n",
            "2023-02-12 21:57:58 (265 MB/s) - ‘roberta.py’ saved [12783/12783]\n",
            "\n",
            "--2023-02-12 21:57:58--  https://git.wmi.amu.edu.pl/s444465/projekt-glebokie/raw/branch/master/gpt2.py\n",
            "Resolving git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)... 150.254.78.40\n",
            "Connecting to git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)|150.254.78.40|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 7976 (7.8K) [text/plain]\n",
            "Saving to: ‘gpt2.py’\n",
            "\n",
            "gpt2.py             100%[===================>]   7.79K  --.-KB/s    in 0s      \n",
            "\n",
            "2023-02-12 21:57:59 (1.37 GB/s) - ‘gpt2.py’ saved [7976/7976]\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import json\n",
        "from pathlib import Path\n",
        "from typing import Dict, List\n",
        "from datasets import load_dataset\n",
        "\n",
        "loaded_data = load_dataset('emotion')\n",
        "!mkdir -v -p data\n",
        "\n",
        "train_path = Path('data/train.json')\n",
        "valid_path = Path('data/valid.json')\n",
        "test_path = Path('data/test.json')\n",
        "data_train, data_valid, data_test = [], [], []\n",
        "\n",
        "for source_data, dataset, max_size in [\n",
        "  (loaded_data['train'], data_train, None),\n",
        "  (loaded_data['validation'], data_valid, None),\n",
        "  (loaded_data['test'], data_test, None),\n",
        "]:\n",
        "  for i, data in enumerate(source_data):\n",
        "    if max_size is not None and i >= max_size:\n",
        "      break\n",
        "    data_line = {\n",
        "      'label': int(data['label']),\n",
        "      'text': data['text'],\n",
        "    }\n",
        "    dataset.append(data_line)\n",
        "\n",
        "print(f'Train: {len(data_train):6d}')\n",
        "print(f'Valid: {len(data_valid):6d}')\n",
        "\n",
        "data_class_1, data_class_2 = [], []\n",
        "\n",
        "\"\"\"for data in data_valid:\n",
        "  label = data['label']\n",
        "  if label == 0:\n",
        "    data_class_1.append(data)\n",
        "  elif label == 1:\n",
        "    data_class_2.append(data)\n",
        "\n",
        "print(f'Label 1: {len(data_class_1):6d}')\n",
        "print(f'Label 2: {len(data_class_2):6d}')\n",
        "\n",
        "size_half_class_1 = int(len(data_class_1) / 2)\n",
        "size_half_class_2 = int(len(data_class_2) / 2)\n",
        "\n",
        "data_valid = data_class_1[:size_half_class_1] + data_class_2[:size_half_class_2]\n",
        "data_test = data_class_1[size_half_class_1:] + data_class_2[size_half_class_2:]\n",
        "\"\"\"\n",
        "\n",
        "print(f'Valid: {len(data_valid):6d}')\n",
        "print(f'Test : {len(data_test):6d}')\n",
        "\n",
        "MAP_LABEL_TRANSLATION = {\n",
        "    0: 'sadness',\n",
        "    1: 'joy',\n",
        "    2: 'love',\n",
        "    3: 'anger',\n",
        "    4: 'fear',\n",
        "    5: 'surprise',\n",
        "}\n",
        "\n",
        "def save_as_translations(original_save_path: Path, data_to_save: List[Dict]) -> None:\n",
        "    file_name = 's2s-' + original_save_path.name\n",
        "    file_path = original_save_path.parent / file_name\n",
        "\n",
        "    print(f'Saving into: {file_path}')\n",
        "    with open(file_path, 'wt') as f_write:\n",
        "        for data_line in data_to_save:\n",
        "            label = data_line['label']\n",
        "            new_label = MAP_LABEL_TRANSLATION[label]\n",
        "            data_line['label'] = new_label\n",
        "            data_line_str = json.dumps(data_line)\n",
        "            f_write.write(f'{data_line_str}\\n')\n",
        "\n",
        "for file_path, data_to_save in [(train_path, data_train), (valid_path, data_valid), (test_path, data_test)]:\n",
        "  print(f'Saving into: {file_path}')\n",
        "  with open(file_path, 'wt') as f_write:\n",
        "    for data_line in data_to_save:\n",
        "      data_line_str = json.dumps(data_line)\n",
        "      f_write.write(f'{data_line_str}\\n')\n",
        "  \n",
        "  save_as_translations(file_path, data_to_save)\n",
        "\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 295,
          "referenced_widgets": [
            "16f1b324020d48c3a8fd4487c42bbd6b",
            "df036cb95c56454998cb7c788d341584",
            "4a0387dee622459498ddc9d7bf201187",
            "604840d710474c71a676c8368c9b3f2f",
            "7e3d18de5d554030bd6aa801ac7f3192",
            "419e5a2aab8147d79490748f633675cd",
            "41452374c2a64afd82d30744b36dd801",
            "ea94ef66672d4b69bf0d5eac6f7dada3",
            "a7e7dd3a259a4a878cfcd6a66ed35c7c",
            "275a9313a7e14c57b66cbe484499c8ec",
            "c01651c9c010429bbf5507770ce6b6ce"
          ]
        },
        "id": "bcR4tWQl0rqt",
        "outputId": "6a2bad78-8eb7-4a90-c839-b7cc470438d7"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "WARNING:datasets.builder:No config specified, defaulting to: emotion/split\n",
            "WARNING:datasets.builder:Found cached dataset emotion (/root/.cache/huggingface/datasets/emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd)\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "  0%|          | 0/3 [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "16f1b324020d48c3a8fd4487c42bbd6b"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "mkdir: created directory 'data'\n",
            "Train:  16000\n",
            "Valid:   2000\n",
            "Valid:   2000\n",
            "Test :   2000\n",
            "Saving into: data/train.json\n",
            "Saving into: data/s2s-train.json\n",
            "Saving into: data/valid.json\n",
            "Saving into: data/s2s-valid.json\n",
            "Saving into: data/test.json\n",
            "Saving into: data/s2s-test.json\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "!head -n 4500 data/train.json > data/train-5k.json\n",
        "!tail -n 2500 data/train.json >> data/train-5k.json\n",
        "!wc -l data/train-5k.json"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "pRmHIvyB0fZe",
        "outputId": "a6f163f0-a393-431c-92e9-aaaf04601832"
      },
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "7000 data/train-5k.json\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from pathlib import Path\n",
        "\n",
        "for file_name in [\"train\", \"valid\", \"test\", \"s2s-train\", \"s2s-valid\", \"s2s-test\"]:\n",
        "  print(f\"=== {file_name} ===\")\n",
        "  all_text = Path(f\"data/{file_name}.json\").read_text().split('\\n')\n",
        "  text = all_text[:2500] + all_text[-2500:]\n",
        "  Path(f\"data/{file_name}-5k.json\").write_text(\"\\n\".join(text))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "rFa6ijdx2L28",
        "outputId": "3303cd99-beba-4685-d8a0-80f819b1b50d"
      },
      "execution_count": 6,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "=== train ===\n",
            "=== valid ===\n",
            "=== test ===\n",
            "=== s2s-train ===\n",
            "=== s2s-valid ===\n",
            "=== s2s-test ===\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "\n",
        "os.environ['TOKENIZERS_PARALLELISM'] = 'true'"
      ],
      "metadata": {
        "id": "8opbDvBv3ZlK"
      },
      "execution_count": 7,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [],
      "metadata": {
        "id": "pxuxjHt8P57X"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!python run_glue.py \\\n",
        "--cache_dir .cache_training \\\n",
        "--model_name_or_path gpt2 \\\n",
        "--custom_model gpt2_hidden \\\n",
        "--freeze_weights \\\n",
        "--train_file data/s2s-train.json  \\\n",
        "--validation_file data/s2s-valid.json \\\n",
        "--test_file data/s2s-test.json \\\n",
        "--per_device_train_batch_size 24 \\\n",
        "--per_device_eval_batch_size 24 \\\n",
        "--do_train \\\n",
        "--do_eval \\\n",
        "--do_predict \\\n",
        "--max_seq_length 128 \\\n",
        "--learning_rate 2e-5 \\\n",
        "--num_train_epochs 5 \\\n",
        "--output_dir out/imdb-5k/gpt2"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "XkkeRPG_z3Jc",
        "outputId": "ffdacc37-5c06-401a-a588-a1d272dd72b0"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "2023-02-12 22:00:15.880386: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
            "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
            "2023-02-12 22:00:16.771169: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
            "2023-02-12 22:00:16.771276: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
            "2023-02-12 22:00:16.771294: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
            "WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
            "INFO:__main__:Training/evaluation parameters TrainingArguments(\n",
            "_n_gpu=1,\n",
            "adafactor=False,\n",
            "adam_beta1=0.9,\n",
            "adam_beta2=0.999,\n",
            "adam_epsilon=1e-08,\n",
            "auto_find_batch_size=False,\n",
            "bf16=False,\n",
            "bf16_full_eval=False,\n",
            "data_seed=None,\n",
            "dataloader_drop_last=False,\n",
            "dataloader_num_workers=0,\n",
            "dataloader_pin_memory=True,\n",
            "ddp_bucket_cap_mb=None,\n",
            "ddp_find_unused_parameters=None,\n",
            "ddp_timeout=1800,\n",
            "debug=[],\n",
            "deepspeed=None,\n",
            "disable_tqdm=False,\n",
            "do_eval=True,\n",
            "do_predict=True,\n",
            "do_train=True,\n",
            "eval_accumulation_steps=None,\n",
            "eval_delay=0,\n",
            "eval_steps=None,\n",
            "evaluation_strategy=no,\n",
            "fp16=False,\n",
            "fp16_backend=auto,\n",
            "fp16_full_eval=False,\n",
            "fp16_opt_level=O1,\n",
            "fsdp=[],\n",
            "fsdp_min_num_params=0,\n",
            "fsdp_transformer_layer_cls_to_wrap=None,\n",
            "full_determinism=False,\n",
            "gradient_accumulation_steps=1,\n",
            "gradient_checkpointing=False,\n",
            "greater_is_better=None,\n",
            "group_by_length=False,\n",
            "half_precision_backend=auto,\n",
            "hub_model_id=None,\n",
            "hub_private_repo=False,\n",
            "hub_strategy=every_save,\n",
            "hub_token=<HUB_TOKEN>,\n",
            "ignore_data_skip=False,\n",
            "include_inputs_for_metrics=False,\n",
            "jit_mode_eval=False,\n",
            "label_names=None,\n",
            "label_smoothing_factor=0.0,\n",
            "learning_rate=2e-05,\n",
            "length_column_name=length,\n",
            "load_best_model_at_end=False,\n",
            "local_rank=-1,\n",
            "log_level=passive,\n",
            "log_level_replica=passive,\n",
            "log_on_each_node=True,\n",
            "logging_dir=out/imdb-5k/gpt2/runs/Feb12_22-00-19_506c7abe63fb,\n",
            "logging_first_step=False,\n",
            "logging_nan_inf_filter=True,\n",
            "logging_steps=500,\n",
            "logging_strategy=steps,\n",
            "lr_scheduler_type=linear,\n",
            "max_grad_norm=1.0,\n",
            "max_steps=-1,\n",
            "metric_for_best_model=None,\n",
            "mp_parameters=,\n",
            "no_cuda=False,\n",
            "num_train_epochs=5.0,\n",
            "optim=adamw_hf,\n",
            "optim_args=None,\n",
            "output_dir=out/imdb-5k/gpt2,\n",
            "overwrite_output_dir=False,\n",
            "past_index=-1,\n",
            "per_device_eval_batch_size=24,\n",
            "per_device_train_batch_size=24,\n",
            "prediction_loss_only=False,\n",
            "push_to_hub=False,\n",
            "push_to_hub_model_id=None,\n",
            "push_to_hub_organization=None,\n",
            "push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
            "ray_scope=last,\n",
            "remove_unused_columns=True,\n",
            "report_to=['tensorboard'],\n",
            "resume_from_checkpoint=None,\n",
            "run_name=out/imdb-5k/gpt2,\n",
            "save_on_each_node=False,\n",
            "save_steps=500,\n",
            "save_strategy=steps,\n",
            "save_total_limit=None,\n",
            "seed=42,\n",
            "sharded_ddp=[],\n",
            "skip_memory_metrics=True,\n",
            "tf32=None,\n",
            "torch_compile=False,\n",
            "torch_compile_backend=None,\n",
            "torch_compile_mode=None,\n",
            "torchdynamo=None,\n",
            "tpu_metrics_debug=False,\n",
            "tpu_num_cores=None,\n",
            "use_ipex=False,\n",
            "use_legacy_prediction_loop=False,\n",
            "use_mps_device=False,\n",
            "warmup_ratio=0.0,\n",
            "warmup_steps=0,\n",
            "weight_decay=0.0,\n",
            "xpu_backend=None,\n",
            ")\n",
            "INFO:__main__:load a local file for train: data/s2s-train.json\n",
            "INFO:__main__:load a local file for validation: data/s2s-valid.json\n",
            "INFO:__main__:load a local file for test: data/s2s-test.json\n",
            "WARNING:datasets.builder:Using custom data configuration default-623c8a7b15a2e58a\n",
            "INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n",
            "INFO:datasets.builder:Generating dataset json (/content/.cache_training/json/default-623c8a7b15a2e58a/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
            "Downloading and preparing dataset json/default to /content/.cache_training/json/default-623c8a7b15a2e58a/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n",
            "Downloading data files: 100% 3/3 [00:00<00:00, 14873.42it/s]\n",
            "INFO:datasets.download.download_manager:Downloading took 0.0 min\n",
            "INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n",
            "Extracting data files: 100% 3/3 [00:00<00:00, 1763.55it/s]\n",
            "INFO:datasets.utils.info_utils:Unable to verify checksums.\n",
            "INFO:datasets.builder:Generating train split\n",
            "INFO:datasets.builder:Generating validation split\n",
            "INFO:datasets.builder:Generating test split\n",
            "INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n",
            "Dataset json downloaded and prepared to /content/.cache_training/json/default-623c8a7b15a2e58a/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n",
            "100% 3/3 [00:00<00:00, 1028.86it/s]\n",
            "Downloading (…)lve/main/config.json: 100% 665/665 [00:00<00:00, 126kB/s]\n",
            "[INFO|configuration_utils.py:660] 2023-02-12 22:00:20,342 >> loading configuration file config.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
            "[INFO|configuration_utils.py:712] 2023-02-12 22:00:20,343 >> Model config GPT2Config {\n",
            "  \"_name_or_path\": \"gpt2\",\n",
            "  \"activation_function\": \"gelu_new\",\n",
            "  \"architectures\": [\n",
            "    \"GPT2LMHeadModel\"\n",
            "  ],\n",
            "  \"attn_pdrop\": 0.1,\n",
            "  \"bos_token_id\": 50256,\n",
            "  \"embd_pdrop\": 0.1,\n",
            "  \"eos_token_id\": 50256,\n",
            "  \"id2label\": {\n",
            "    \"0\": \"LABEL_0\",\n",
            "    \"1\": \"LABEL_1\",\n",
            "    \"2\": \"LABEL_2\",\n",
            "    \"3\": \"LABEL_3\",\n",
            "    \"4\": \"LABEL_4\",\n",
            "    \"5\": \"LABEL_5\"\n",
            "  },\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"label2id\": {\n",
            "    \"LABEL_0\": 0,\n",
            "    \"LABEL_1\": 1,\n",
            "    \"LABEL_2\": 2,\n",
            "    \"LABEL_3\": 3,\n",
            "    \"LABEL_4\": 4,\n",
            "    \"LABEL_5\": 5\n",
            "  },\n",
            "  \"layer_norm_epsilon\": 1e-05,\n",
            "  \"model_type\": \"gpt2\",\n",
            "  \"n_ctx\": 1024,\n",
            "  \"n_embd\": 768,\n",
            "  \"n_head\": 12,\n",
            "  \"n_inner\": null,\n",
            "  \"n_layer\": 12,\n",
            "  \"n_positions\": 1024,\n",
            "  \"reorder_and_upcast_attn\": false,\n",
            "  \"resid_pdrop\": 0.1,\n",
            "  \"scale_attn_by_inverse_layer_idx\": false,\n",
            "  \"scale_attn_weights\": true,\n",
            "  \"summary_activation\": null,\n",
            "  \"summary_first_dropout\": 0.1,\n",
            "  \"summary_proj_to_labels\": true,\n",
            "  \"summary_type\": \"cls_index\",\n",
            "  \"summary_use_proj\": true,\n",
            "  \"task_specific_params\": {\n",
            "    \"text-generation\": {\n",
            "      \"do_sample\": true,\n",
            "      \"max_length\": 50\n",
            "    }\n",
            "  },\n",
            "  \"transformers_version\": \"4.26.1\",\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 50257\n",
            "}\n",
            "\n",
            "[INFO|tokenization_auto.py:458] 2023-02-12 22:00:20,434 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
            "[INFO|configuration_utils.py:660] 2023-02-12 22:00:20,525 >> loading configuration file config.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
            "[INFO|configuration_utils.py:712] 2023-02-12 22:00:20,525 >> Model config GPT2Config {\n",
            "  \"_name_or_path\": \"gpt2\",\n",
            "  \"activation_function\": \"gelu_new\",\n",
            "  \"architectures\": [\n",
            "    \"GPT2LMHeadModel\"\n",
            "  ],\n",
            "  \"attn_pdrop\": 0.1,\n",
            "  \"bos_token_id\": 50256,\n",
            "  \"embd_pdrop\": 0.1,\n",
            "  \"eos_token_id\": 50256,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"layer_norm_epsilon\": 1e-05,\n",
            "  \"model_type\": \"gpt2\",\n",
            "  \"n_ctx\": 1024,\n",
            "  \"n_embd\": 768,\n",
            "  \"n_head\": 12,\n",
            "  \"n_inner\": null,\n",
            "  \"n_layer\": 12,\n",
            "  \"n_positions\": 1024,\n",
            "  \"reorder_and_upcast_attn\": false,\n",
            "  \"resid_pdrop\": 0.1,\n",
            "  \"scale_attn_by_inverse_layer_idx\": false,\n",
            "  \"scale_attn_weights\": true,\n",
            "  \"summary_activation\": null,\n",
            "  \"summary_first_dropout\": 0.1,\n",
            "  \"summary_proj_to_labels\": true,\n",
            "  \"summary_type\": \"cls_index\",\n",
            "  \"summary_use_proj\": true,\n",
            "  \"task_specific_params\": {\n",
            "    \"text-generation\": {\n",
            "      \"do_sample\": true,\n",
            "      \"max_length\": 50\n",
            "    }\n",
            "  },\n",
            "  \"transformers_version\": \"4.26.1\",\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 50257\n",
            "}\n",
            "\n",
            "Downloading (…)olve/main/vocab.json: 100% 1.04M/1.04M [00:00<00:00, 9.19MB/s]\n",
            "Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 4.93MB/s]\n",
            "Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 11.9MB/s]\n",
            "[INFO|tokenization_utils_base.py:1802] 2023-02-12 22:00:21,743 >> loading file vocab.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/vocab.json\n",
            "[INFO|tokenization_utils_base.py:1802] 2023-02-12 22:00:21,743 >> loading file merges.txt from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/merges.txt\n",
            "[INFO|tokenization_utils_base.py:1802] 2023-02-12 22:00:21,743 >> loading file tokenizer.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer.json\n",
            "[INFO|tokenization_utils_base.py:1802] 2023-02-12 22:00:21,744 >> loading file added_tokens.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:1802] 2023-02-12 22:00:21,744 >> loading file special_tokens_map.json from cache at None\n",
            "[INFO|tokenization_utils_base.py:1802] 2023-02-12 22:00:21,744 >> loading file tokenizer_config.json from cache at None\n",
            "[INFO|configuration_utils.py:660] 2023-02-12 22:00:21,744 >> loading configuration file config.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
            "[INFO|configuration_utils.py:712] 2023-02-12 22:00:21,745 >> Model config GPT2Config {\n",
            "  \"_name_or_path\": \"gpt2\",\n",
            "  \"activation_function\": \"gelu_new\",\n",
            "  \"architectures\": [\n",
            "    \"GPT2LMHeadModel\"\n",
            "  ],\n",
            "  \"attn_pdrop\": 0.1,\n",
            "  \"bos_token_id\": 50256,\n",
            "  \"embd_pdrop\": 0.1,\n",
            "  \"eos_token_id\": 50256,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"layer_norm_epsilon\": 1e-05,\n",
            "  \"model_type\": \"gpt2\",\n",
            "  \"n_ctx\": 1024,\n",
            "  \"n_embd\": 768,\n",
            "  \"n_head\": 12,\n",
            "  \"n_inner\": null,\n",
            "  \"n_layer\": 12,\n",
            "  \"n_positions\": 1024,\n",
            "  \"reorder_and_upcast_attn\": false,\n",
            "  \"resid_pdrop\": 0.1,\n",
            "  \"scale_attn_by_inverse_layer_idx\": false,\n",
            "  \"scale_attn_weights\": true,\n",
            "  \"summary_activation\": null,\n",
            "  \"summary_first_dropout\": 0.1,\n",
            "  \"summary_proj_to_labels\": true,\n",
            "  \"summary_type\": \"cls_index\",\n",
            "  \"summary_use_proj\": true,\n",
            "  \"task_specific_params\": {\n",
            "    \"text-generation\": {\n",
            "      \"do_sample\": true,\n",
            "      \"max_length\": 50\n",
            "    }\n",
            "  },\n",
            "  \"transformers_version\": \"4.26.1\",\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 50257\n",
            "}\n",
            "\n",
            "INFO:__main__:Using hidden states in model: True\n",
            "INFO:__main__:Using implementation from class: GPT2ForSequenceClassificationCustom\n",
            "Downloading (…)\"pytorch_model.bin\";: 100% 548M/548M [00:05<00:00, 103MB/s]\n",
            "[INFO|modeling_utils.py:2275] 2023-02-12 22:00:27,304 >> loading weights file pytorch_model.bin from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin\n",
            "[INFO|modeling_utils.py:2857] 2023-02-12 22:00:30,150 >> All model checkpoint weights were used when initializing GPT2ForSequenceClassificationCustom.\n",
            "\n",
            "[WARNING|modeling_utils.py:2859] 2023-02-12 22:00:30,150 >> Some weights of GPT2ForSequenceClassificationCustom were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.dense_1_hidden.weight', 'score.dense_2.weight', 'score.out_proj.weight', 'score.dense_2.bias', 'score.dense_1_hidden.bias', 'score.dense_1_input.bias', 'score.dense_1_input.weight']\n",
            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
            "INFO:__main__:Freezing encoder weights\n",
            "INFO:__main__:Freezing layer 1\n",
            "INFO:__main__:Freezing layer 2\n",
            "INFO:__main__:Freezing layer 3\n",
            "INFO:__main__:Freezing layer 4\n",
            "INFO:__main__:Freezing layer 5\n",
            "INFO:__main__:Freezing layer 6\n",
            "INFO:__main__:Freezing layer 7\n",
            "INFO:__main__:Freezing layer 8\n",
            "INFO:__main__:Freezing layer 9\n",
            "INFO:__main__:Freezing layer 10\n",
            "INFO:__main__:Freezing layer 11\n",
            "INFO:__main__:Freezing layer 12\n",
            "INFO:__main__:Freezing layer 13\n",
            "INFO:__main__:Freezing layer 14\n",
            "INFO:__main__:Freezing layer 15\n",
            "INFO:__main__:Freezing layer 16\n",
            "INFO:__main__:Freezing layer 17\n",
            "INFO:__main__:Freezing layer 18\n",
            "INFO:__main__:Freezing layer 19\n",
            "INFO:__main__:Freezing layer 20\n",
            "INFO:__main__:Freezing layer 21\n",
            "INFO:__main__:Freezing layer 22\n",
            "INFO:__main__:Freezing layer 23\n",
            "INFO:__main__:Freezing layer 24\n",
            "INFO:__main__:Freezing layer 25\n",
            "INFO:__main__:Freezing layer 26\n",
            "INFO:__main__:Freezing layer 27\n",
            "INFO:__main__:Freezing layer 28\n",
            "INFO:__main__:Freezing layer 29\n",
            "INFO:__main__:Freezing layer 30\n",
            "INFO:__main__:Freezing layer 31\n",
            "INFO:__main__:Freezing layer 32\n",
            "INFO:__main__:Freezing layer 33\n",
            "INFO:__main__:Freezing layer 34\n",
            "INFO:__main__:Freezing layer 35\n",
            "INFO:__main__:Freezing layer 36\n",
            "INFO:__main__:Freezing layer 37\n",
            "INFO:__main__:Freezing layer 38\n",
            "INFO:__main__:Freezing layer 39\n",
            "INFO:__main__:Freezing layer 40\n",
            "INFO:__main__:Ignoring layer 41\n",
            "INFO:__main__:Ignoring layer 42\n",
            "INFO:__main__:Ignoring layer 43\n",
            "INFO:__main__:Ignoring layer 44\n",
            "INFO:__main__:Ignoring layer 45\n",
            "INFO:__main__:Ignoring layer 46\n",
            "INFO:__main__:Ignoring layer 47\n",
            "INFO:__main__:Ignoring layer 48\n",
            "INFO:__main__:Ignoring layer 49\n",
            "INFO:__main__:Ignoring layer 50\n",
            "INFO:__main__:Ignoring layer 51\n",
            "INFO:__main__:Ignoring layer 52\n",
            "INFO:__main__:Ignoring layer 53\n",
            "INFO:__main__:Ignoring layer 54\n",
            "INFO:__main__:Ignoring layer 55\n",
            "INFO:__main__:Ignoring layer 56\n",
            "INFO:__main__:Ignoring layer 57\n",
            "INFO:__main__:Ignoring layer 58\n",
            "INFO:__main__:Ignoring layer 59\n",
            "INFO:__main__:Ignoring layer 60\n",
            "INFO:__main__:Ignoring layer 61\n",
            "INFO:__main__:Ignoring layer 62\n",
            "INFO:__main__:Ignoring layer 63\n",
            "INFO:__main__:Ignoring layer 64\n",
            "INFO:__main__:Ignoring layer 65\n",
            "INFO:__main__:Ignoring layer 66\n",
            "INFO:__main__:Ignoring layer 67\n",
            "INFO:__main__:Ignoring layer 68\n",
            "INFO:__main__:Ignoring layer 69\n",
            "INFO:__main__:Ignoring layer 70\n",
            "INFO:__main__:Ignoring layer 71\n",
            "INFO:__main__:Ignoring layer 72\n",
            "INFO:__main__:Ignoring layer 73\n",
            "INFO:__main__:Ignoring layer 74\n",
            "INFO:__main__:Ignoring layer 75\n",
            "INFO:__main__:Ignoring layer 76\n",
            "INFO:__main__:Ignoring layer 77\n",
            "INFO:__main__:Ignoring layer 78\n",
            "INFO:__main__:Ignoring layer 79\n",
            "INFO:__main__:Ignoring layer 80\n",
            "INFO:__main__:Ignoring layer 81\n",
            "INFO:__main__:Ignoring layer 82\n",
            "INFO:__main__:Ignoring layer 83\n",
            "INFO:__main__:Ignoring layer 84\n",
            "INFO:__main__:Ignoring layer 85\n",
            "INFO:__main__:Ignoring layer 86\n",
            "INFO:__main__:Ignoring layer 87\n",
            "INFO:__main__:Ignoring layer 88\n",
            "INFO:__main__:Ignoring layer 89\n",
            "INFO:__main__:Ignoring layer 90\n",
            "INFO:__main__:Ignoring layer 91\n",
            "INFO:__main__:Ignoring layer 92\n",
            "INFO:__main__:Ignoring layer 93\n",
            "INFO:__main__:Ignoring layer 94\n",
            "INFO:__main__:Ignoring layer 95\n",
            "INFO:__main__:Ignoring layer 96\n",
            "INFO:__main__:Ignoring layer 97\n",
            "INFO:__main__:Ignoring layer 98\n",
            "INFO:__main__:Ignoring layer 99\n",
            "INFO:__main__:Ignoring layer 100\n",
            "INFO:__main__:Ignoring layer 101\n",
            "INFO:__main__:Ignoring layer 102\n",
            "INFO:__main__:Ignoring layer 103\n",
            "INFO:__main__:Ignoring layer 104\n",
            "INFO:__main__:Ignoring layer 105\n",
            "INFO:__main__:Ignoring layer 106\n",
            "INFO:__main__:Ignoring layer 107\n",
            "INFO:__main__:Ignoring layer 108\n",
            "INFO:__main__:Ignoring layer 109\n",
            "INFO:__main__:Ignoring layer 110\n",
            "INFO:__main__:Ignoring layer 111\n",
            "INFO:__main__:Ignoring layer 112\n",
            "INFO:__main__:Ignoring layer 113\n",
            "INFO:__main__:Ignoring layer 114\n",
            "INFO:__main__:Ignoring layer 115\n",
            "INFO:__main__:Ignoring layer 116\n",
            "INFO:__main__:Ignoring layer 117\n",
            "INFO:__main__:Ignoring layer 118\n",
            "INFO:__main__:Ignoring layer 119\n",
            "INFO:__main__:Ignoring layer 120\n",
            "INFO:__main__:Ignoring layer 121\n",
            "INFO:__main__:Ignoring layer 122\n",
            "INFO:__main__:Ignoring layer 123\n",
            "INFO:__main__:Ignoring layer 124\n",
            "INFO:__main__:Ignoring layer 125\n",
            "INFO:__main__:Ignoring layer 126\n",
            "INFO:__main__:Ignoring layer 127\n",
            "INFO:__main__:Ignoring layer 128\n",
            "INFO:__main__:Ignoring layer 129\n",
            "INFO:__main__:Ignoring layer 130\n",
            "INFO:__main__:Ignoring layer 131\n",
            "INFO:__main__:Ignoring layer 132\n",
            "INFO:__main__:Ignoring layer 133\n",
            "INFO:__main__:Ignoring layer 134\n",
            "INFO:__main__:Ignoring layer 135\n",
            "INFO:__main__:Ignoring layer 136\n",
            "INFO:__main__:Ignoring layer 137\n",
            "INFO:__main__:Ignoring layer 138\n",
            "INFO:__main__:Ignoring layer 139\n",
            "INFO:__main__:Ignoring layer 140\n",
            "INFO:__main__:Ignoring layer 141\n",
            "INFO:__main__:Ignoring layer 142\n",
            "INFO:__main__:Ignoring layer 143\n",
            "INFO:__main__:Ignoring layer 144\n",
            "INFO:__main__:Ignoring layer 145\n",
            "INFO:__main__:Ignoring layer 146\n",
            "INFO:__main__:Ignoring layer 147\n",
            "INFO:__main__:Ignoring layer 148\n",
            "INFO:__main__:Ignoring layer 149\n",
            "INFO:__main__:Ignoring layer 150\n",
            "INFO:__main__:Ignoring layer 151\n",
            "INFO:__main__:Ignoring layer 152\n",
            "INFO:__main__:Ignoring layer 153\n",
            "INFO:__main__:Ignoring layer 154\n",
            "INFO:__main__:Ignoring layer 155\n",
            "[ERROR|tokenization_utils_base.py:1042] 2023-02-12 22:00:30,162 >> Using pad_token, but it is not set yet.\n",
            "INFO:__main__:Set PAD token to EOS: <|endoftext|>\n",
            "Running tokenizer on dataset:   0% 0/16 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/.cache_training/json/default-623c8a7b15a2e58a/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-c8be49ce7f75c3ae.arrow\n",
            "Running tokenizer on dataset: 100% 16/16 [00:01<00:00, 11.14ba/s]\n",
            "Running tokenizer on dataset:   0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/.cache_training/json/default-623c8a7b15a2e58a/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-01058c80c2124fd4.arrow\n",
            "Running tokenizer on dataset: 100% 2/2 [00:00<00:00,  6.04ba/s]\n",
            "Running tokenizer on dataset:   0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/.cache_training/json/default-623c8a7b15a2e58a/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-53937ce94683c2e2.arrow\n",
            "Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 11.40ba/s]\n",
            "INFO:__main__:Sample 10476 of the training set: {'label': 4, 'text': 'i do find new friends i m going to try extra hard to make them stay and if i decide that i don t want to feel hurt again and just ride out the last year of school on my own i m going to have to try extra hard not to care what people think of me being a loner', 'input_ids': [72, 466, 1064, 649, 2460, 1312, 285, 1016, 284, 1949, 3131, 1327, 284, 787, 606, 2652, 290, 611, 1312, 5409, 326, 1312, 836, 256, 765, 284, 1254, 5938, 757, 290, 655, 6594, 503, 262, 938, 614, 286, 1524, 319, 616, 898, 1312, 285, 1016, 284, 423, 284, 1949, 3131, 1327, 407, 284, 1337, 644, 661, 892, 286, 502, 852, 257, 300, 14491, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
            "INFO:__main__:Sample 1824 of the training set: {'label': 2, 'text': 'i asked them to join me in creating a world where all year old girls could grow up feeling hopeful and powerful', 'input_ids': [72, 1965, 606, 284, 4654, 502, 287, 4441, 257, 995, 810, 477, 614, 1468, 4813, 714, 1663, 510, 4203, 17836, 290, 3665, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
            "INFO:__main__:Sample 409 of the training set: {'label': 3, 'text': 'i feel when you are a caring person you attract other caring people into your life', 'input_ids': [72, 1254, 618, 345, 389, 257, 18088, 1048, 345, 4729, 584, 18088, 661, 656, 534, 1204, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
            "Downloading builder script: 100% 4.20k/4.20k [00:00<00:00, 4.82MB/s]\n",
            "[INFO|trainer.py:710] 2023-02-12 22:00:38,984 >> The following columns in the training set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.\n",
            "/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
            "  warnings.warn(\n",
            "[INFO|trainer.py:1650] 2023-02-12 22:00:38,993 >> ***** Running training *****\n",
            "[INFO|trainer.py:1651] 2023-02-12 22:00:38,993 >>   Num examples = 16000\n",
            "[INFO|trainer.py:1652] 2023-02-12 22:00:38,993 >>   Num Epochs = 5\n",
            "[INFO|trainer.py:1653] 2023-02-12 22:00:38,993 >>   Instantaneous batch size per device = 24\n",
            "[INFO|trainer.py:1654] 2023-02-12 22:00:38,993 >>   Total train batch size (w. parallel, distributed & accumulation) = 24\n",
            "[INFO|trainer.py:1655] 2023-02-12 22:00:38,993 >>   Gradient Accumulation steps = 1\n",
            "[INFO|trainer.py:1656] 2023-02-12 22:00:38,993 >>   Total optimization steps = 3335\n",
            "[INFO|trainer.py:1657] 2023-02-12 22:00:38,994 >>   Number of trainable parameters = 68517888\n",
            "{'loss': 1.0593, 'learning_rate': 1.7001499250374815e-05, 'epoch': 0.75}\n",
            " 15% 500/3335 [04:16<24:04,  1.96it/s][INFO|trainer.py:2709] 2023-02-12 22:04:55,709 >> Saving model checkpoint to out/imdb-5k/gpt2/checkpoint-500\n",
            "[INFO|configuration_utils.py:453] 2023-02-12 22:04:55,710 >> Configuration saved in out/imdb-5k/gpt2/checkpoint-500/config.json\n",
            "[INFO|modeling_utils.py:1704] 2023-02-12 22:04:57,444 >> Model weights saved in out/imdb-5k/gpt2/checkpoint-500/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2160] 2023-02-12 22:04:57,444 >> tokenizer config file saved in out/imdb-5k/gpt2/checkpoint-500/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2167] 2023-02-12 22:04:57,444 >> Special tokens file saved in out/imdb-5k/gpt2/checkpoint-500/special_tokens_map.json\n",
            "{'loss': 0.3829, 'learning_rate': 1.4002998500749626e-05, 'epoch': 1.5}\n",
            " 30% 1000/3335 [08:36<19:51,  1.96it/s][INFO|trainer.py:2709] 2023-02-12 22:09:15,813 >> Saving model checkpoint to out/imdb-5k/gpt2/checkpoint-1000\n",
            "[INFO|configuration_utils.py:453] 2023-02-12 22:09:15,814 >> Configuration saved in out/imdb-5k/gpt2/checkpoint-1000/config.json\n",
            "[INFO|modeling_utils.py:1704] 2023-02-12 22:09:17,628 >> Model weights saved in out/imdb-5k/gpt2/checkpoint-1000/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2160] 2023-02-12 22:09:17,629 >> tokenizer config file saved in out/imdb-5k/gpt2/checkpoint-1000/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2167] 2023-02-12 22:09:17,630 >> Special tokens file saved in out/imdb-5k/gpt2/checkpoint-1000/special_tokens_map.json\n",
            "{'loss': 0.256, 'learning_rate': 1.100449775112444e-05, 'epoch': 2.25}\n",
            " 45% 1500/3335 [12:56<15:43,  1.95it/s][INFO|trainer.py:2709] 2023-02-12 22:13:36,008 >> Saving model checkpoint to out/imdb-5k/gpt2/checkpoint-1500\n",
            "[INFO|configuration_utils.py:453] 2023-02-12 22:13:36,009 >> Configuration saved in out/imdb-5k/gpt2/checkpoint-1500/config.json\n",
            "[INFO|modeling_utils.py:1704] 2023-02-12 22:13:37,703 >> Model weights saved in out/imdb-5k/gpt2/checkpoint-1500/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2160] 2023-02-12 22:13:37,704 >> tokenizer config file saved in out/imdb-5k/gpt2/checkpoint-1500/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2167] 2023-02-12 22:13:37,704 >> Special tokens file saved in out/imdb-5k/gpt2/checkpoint-1500/special_tokens_map.json\n",
            "{'loss': 0.2101, 'learning_rate': 8.005997001499251e-06, 'epoch': 3.0}\n",
            " 60% 2000/3335 [17:17<11:23,  1.95it/s][INFO|trainer.py:2709] 2023-02-12 22:17:56,308 >> Saving model checkpoint to out/imdb-5k/gpt2/checkpoint-2000\n",
            "[INFO|configuration_utils.py:453] 2023-02-12 22:17:56,309 >> Configuration saved in out/imdb-5k/gpt2/checkpoint-2000/config.json\n",
            "[INFO|modeling_utils.py:1704] 2023-02-12 22:17:58,005 >> Model weights saved in out/imdb-5k/gpt2/checkpoint-2000/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2160] 2023-02-12 22:17:58,006 >> tokenizer config file saved in out/imdb-5k/gpt2/checkpoint-2000/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2167] 2023-02-12 22:17:58,006 >> Special tokens file saved in out/imdb-5k/gpt2/checkpoint-2000/special_tokens_map.json\n",
            "{'loss': 0.17, 'learning_rate': 5.0074962518740634e-06, 'epoch': 3.75}\n",
            " 75% 2500/3335 [21:37<07:05,  1.96it/s][INFO|trainer.py:2709] 2023-02-12 22:22:16,686 >> Saving model checkpoint to out/imdb-5k/gpt2/checkpoint-2500\n",
            "[INFO|configuration_utils.py:453] 2023-02-12 22:22:16,687 >> Configuration saved in out/imdb-5k/gpt2/checkpoint-2500/config.json\n",
            "[INFO|modeling_utils.py:1704] 2023-02-12 22:22:18,356 >> Model weights saved in out/imdb-5k/gpt2/checkpoint-2500/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2160] 2023-02-12 22:22:18,357 >> tokenizer config file saved in out/imdb-5k/gpt2/checkpoint-2500/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2167] 2023-02-12 22:22:18,357 >> Special tokens file saved in out/imdb-5k/gpt2/checkpoint-2500/special_tokens_map.json\n",
            "{'loss': 0.1569, 'learning_rate': 2.008995502248876e-06, 'epoch': 4.5}\n",
            " 90% 3000/3335 [25:57<02:51,  1.95it/s][INFO|trainer.py:2709] 2023-02-12 22:26:36,938 >> Saving model checkpoint to out/imdb-5k/gpt2/checkpoint-3000\n",
            "[INFO|configuration_utils.py:453] 2023-02-12 22:26:36,939 >> Configuration saved in out/imdb-5k/gpt2/checkpoint-3000/config.json\n",
            "[INFO|modeling_utils.py:1704] 2023-02-12 22:26:38,608 >> Model weights saved in out/imdb-5k/gpt2/checkpoint-3000/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2160] 2023-02-12 22:26:38,608 >> tokenizer config file saved in out/imdb-5k/gpt2/checkpoint-3000/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2167] 2023-02-12 22:26:38,608 >> Special tokens file saved in out/imdb-5k/gpt2/checkpoint-3000/special_tokens_map.json\n",
            "100% 3335/3335 [28:53<00:00,  2.15it/s][INFO|trainer.py:1901] 2023-02-12 22:29:32,259 >> \n",
            "\n",
            "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
            "\n",
            "\n",
            "{'train_runtime': 1733.281, 'train_samples_per_second': 46.155, 'train_steps_per_second': 1.924, 'train_loss': 0.35007504373118614, 'epoch': 5.0}\n",
            "100% 3335/3335 [28:53<00:00,  1.92it/s]\n",
            "[INFO|trainer.py:2709] 2023-02-12 22:29:32,277 >> Saving model checkpoint to out/imdb-5k/gpt2\n",
            "[INFO|configuration_utils.py:453] 2023-02-12 22:29:32,278 >> Configuration saved in out/imdb-5k/gpt2/config.json\n",
            "[INFO|modeling_utils.py:1704] 2023-02-12 22:29:33,934 >> Model weights saved in out/imdb-5k/gpt2/pytorch_model.bin\n",
            "[INFO|tokenization_utils_base.py:2160] 2023-02-12 22:29:33,934 >> tokenizer config file saved in out/imdb-5k/gpt2/tokenizer_config.json\n",
            "[INFO|tokenization_utils_base.py:2167] 2023-02-12 22:29:33,934 >> Special tokens file saved in out/imdb-5k/gpt2/special_tokens_map.json\n",
            "***** train metrics *****\n",
            "  epoch                    =        5.0\n",
            "  train_loss               =     0.3501\n",
            "  train_runtime            = 0:28:53.28\n",
            "  train_samples            =      16000\n",
            "  train_samples_per_second =     46.155\n",
            "  train_steps_per_second   =      1.924\n",
            "INFO:__main__:*** Evaluate ***\n",
            "[INFO|trainer.py:710] 2023-02-12 22:29:34,047 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2964] 2023-02-12 22:29:34,108 >> ***** Running Evaluation *****\n",
            "[INFO|trainer.py:2966] 2023-02-12 22:29:34,108 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2969] 2023-02-12 22:29:34,108 >>   Batch size = 24\n",
            "100% 84/84 [00:17<00:00,  4.83it/s]\n",
            "***** eval metrics *****\n",
            "  epoch                   =        5.0\n",
            "  eval_accuracy           =       0.93\n",
            "  eval_loss               =     0.1531\n",
            "  eval_runtime            = 0:00:17.72\n",
            "  eval_samples            =       2000\n",
            "  eval_samples_per_second =    112.855\n",
            "  eval_steps_per_second   =       4.74\n",
            "INFO:__main__:*** Predict ***\n",
            "[INFO|trainer.py:710] 2023-02-12 22:29:51,834 >> The following columns in the test set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`,  you can safely ignore this message.\n",
            "[INFO|trainer.py:2964] 2023-02-12 22:29:51,836 >> ***** Running Prediction *****\n",
            "[INFO|trainer.py:2966] 2023-02-12 22:29:51,836 >>   Num examples = 2000\n",
            "[INFO|trainer.py:2969] 2023-02-12 22:29:51,836 >>   Batch size = 24\n",
            "100% 84/84 [00:17<00:00,  4.81it/s]\n",
            "INFO:__main__:***** Predict results None *****\n",
            "[INFO|modelcard.py:449] 2023-02-12 22:30:09,657 >> Dropping the following result as it does not have all the necessary fields:\n",
            "{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.9300000071525574}]}\n"
          ]
        }
      ]
    }
  ]
}