projekt_glebokie/main.ipynb

{
  "cells": [
    {
      "cell_type": "code",
      "source": [
        "data_amount = 5000"
      ],
      "metadata": {
        "id": "oTWhQK1Aw-J7"
      },
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "aCSHYmqxnJmd",
        "outputId": "5e66d5f2-7e6e-4495-ca88-ccbd0c64ac33"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting transformers\n",
            "  Downloading transformers-4.16.2-py3-none-any.whl (3.5 MB)\n",
            "\u001b[K     |████████████████████████████████| 3.5 MB 4.3 MB/s \n",
            "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.62.3)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.21.5)\n",
            "Collecting tokenizers!=0.11.3,>=0.10.1\n",
            "  Downloading tokenizers-0.11.5-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.8 MB)\n",
            "\u001b[K     |████████████████████████████████| 6.8 MB 64.5 MB/s \n",
            "\u001b[?25hCollecting sacremoses\n",
            "  Downloading sacremoses-0.0.47-py2.py3-none-any.whl (895 kB)\n",
            "\u001b[K     |████████████████████████████████| 895 kB 58.2 MB/s \n",
            "\u001b[?25hCollecting huggingface-hub<1.0,>=0.1.0\n",
            "  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)\n",
            "\u001b[K     |████████████████████████████████| 67 kB 7.8 MB/s \n",
            "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n",
            "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.11.0)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n",
            "Collecting pyyaml>=5.1\n",
            "  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n",
            "\u001b[K     |████████████████████████████████| 596 kB 60.4 MB/s \n",
            "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.4.2)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers) (3.10.0.2)\n",
            "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.7)\n",
            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.7.0)\n",
            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.10.8)\n",
            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n",
            "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.1.0)\n",
            "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n",
            "Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers\n",
            "  Attempting uninstall: pyyaml\n",
            "    Found existing installation: PyYAML 3.13\n",
            "    Uninstalling PyYAML-3.13:\n",
            "      Successfully uninstalled PyYAML-3.13\n",
            "Successfully installed huggingface-hub-0.4.0 pyyaml-6.0 sacremoses-0.0.47 tokenizers-0.11.5 transformers-4.16.2\n"
          ]
        }
      ],
      "source": [
        "!pip3 install transformers"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Jk99LQjzmzvw"
      },
      "outputs": [],
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score\n",
        "import torch\n",
        "from transformers import TrainingArguments, Trainer\n",
        "from transformers import BertTokenizer, BertForSequenceClassification\n",
        "from transformers import EarlyStoppingCallback\n",
        "import matplotlib.pyplot as plt"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "wb-zBvXlaTAO",
        "outputId": "62a128ce-8b64-404a-a462-b9c92e350246"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "id": "jFTxtzzKmzv1",
        "outputId": "afc4b505-8f43-484b-dbad-20f9e97fe37d"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py:2882: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version.\n",
            "\n",
            "\n",
            "  exec(code_obj, self.user_global_ns, self.user_ns)\n",
            "Skipping line 16844: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 19370: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 31753: field larger than field limit (131072)\n",
            "Skipping line 33676: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 65976: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 116130: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 127080: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 154052: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 174200: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 189740: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 274245: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 275624: field larger than field limit (131072)\n",
            "Skipping line 302668: field larger than field limit (131072)\n",
            "Skipping line 307322: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 317541: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 333957: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 345859: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 359845: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 359846: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 359847: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 359849: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 371329: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 384761: field larger than field limit (131072)\n",
            "Skipping line 389712: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 391820: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 398927: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 401260: field larger than field limit (131072)\n",
            "Skipping line 403079: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 454667: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 465419: field larger than field limit (131072)\n",
            "Skipping line 466152: field larger than field limit (131072)\n",
            "Skipping line 485309: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 529874: field larger than field limit (131072)\n",
            "Skipping line 552169: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 554628: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 560429: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 589855: field larger than field limit (131072)\n",
            "Skipping line 601507: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 614020: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 630106: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 632882: field larger than field limit (131072)\n",
            "Skipping line 637573: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 658667: field larger than field limit (131072)\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "\n",
              "  <div id=\"df-95a8d503-58ea-4f94-ba7f-46ac042b6219\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>id</th>\n",
              "      <th>gender</th>\n",
              "      <th>age</th>\n",
              "      <th>topic</th>\n",
              "      <th>sign</th>\n",
              "      <th>date</th>\n",
              "      <th>text</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>2059027</td>\n",
              "      <td>male</td>\n",
              "      <td>15</td>\n",
              "      <td>Student</td>\n",
              "      <td>Leo</td>\n",
              "      <td>14,May,2004</td>\n",
              "      <td>Info has been found (+/- 100 pages,...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>2059027</td>\n",
              "      <td>male</td>\n",
              "      <td>15</td>\n",
              "      <td>Student</td>\n",
              "      <td>Leo</td>\n",
              "      <td>13,May,2004</td>\n",
              "      <td>These are the team members:   Drewe...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>2059027</td>\n",
              "      <td>male</td>\n",
              "      <td>15</td>\n",
              "      <td>Student</td>\n",
              "      <td>Leo</td>\n",
              "      <td>12,May,2004</td>\n",
              "      <td>In het kader van kernfusie op aarde...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>2059027</td>\n",
              "      <td>male</td>\n",
              "      <td>15</td>\n",
              "      <td>Student</td>\n",
              "      <td>Leo</td>\n",
              "      <td>12,May,2004</td>\n",
              "      <td>testing!!!  testing!!!</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>3581210</td>\n",
              "      <td>male</td>\n",
              "      <td>33</td>\n",
              "      <td>InvestmentBanking</td>\n",
              "      <td>Aquarius</td>\n",
              "      <td>11,June,2004</td>\n",
              "      <td>Thanks to Yahoo!'s Toolbar I can ...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-95a8d503-58ea-4f94-ba7f-46ac042b6219')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-95a8d503-58ea-4f94-ba7f-46ac042b6219 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-95a8d503-58ea-4f94-ba7f-46ac042b6219');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ],
            "text/plain": [
              "        id  ...                                               text\n",
              "0  2059027  ...             Info has been found (+/- 100 pages,...\n",
              "1  2059027  ...             These are the team members:   Drewe...\n",
              "2  2059027  ...             In het kader van kernfusie op aarde...\n",
              "3  2059027  ...                   testing!!!  testing!!!          \n",
              "4  3581210  ...               Thanks to Yahoo!'s Toolbar I can ...\n",
              "\n",
              "[5 rows x 7 columns]"
            ]
          },
          "metadata": {},
          "execution_count": 100
        }
      ],
      "source": [
        "data_path = 'drive/MyDrive/blogtext.csv'\n",
        "data = pd.read_csv(data_path, error_bad_lines=False, engine='python')\n",
        "data = data[:data_amount]\n",
        "data.head()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "84JKE7annp0B"
      },
      "outputs": [],
      "source": [
        "if (torch.cuda.is_available()):\n",
        "    device = \"cuda:0\"\n",
        "    torch.cuda.empty_cache()\n",
        "else:\n",
        "    device = \"cpu\""
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "KbKkY5Dbmzv3"
      },
      "source": [
        "# Model typu encoder (BertForSequenceClassification)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "s4sgNo1rmzv5",
        "outputId": "fbce79df-ecf4-4b7f-bc91-b430b7747ccc"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "loading file https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/45c3f7a79a80e1cf0a489e5c62b43f173c15db47864303a55d623bb3c96f72a5.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99\n",
            "loading file https://huggingface.co/bert-base-uncased/resolve/main/added_tokens.json from cache at None\n",
            "loading file https://huggingface.co/bert-base-uncased/resolve/main/special_tokens_map.json from cache at None\n",
            "loading file https://huggingface.co/bert-base-uncased/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/c1d7f0a763fb63861cc08553866f1fc3e5a6f4f07621be277452d26d71303b7e.20430bd8e10ef77a7d2977accefe796051e01bc2fc4aa146bc862997a1a15e79\n",
            "loading file https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/534479488c54aeaf9c3406f647aa2ec13648c06771ffe269edabebd4c412da1d.7f2721073f19841be16f41b0a70b600ca6b880c8f3df6f3535cbc704371bdfa4\n",
            "loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e\n",
            "Model config BertConfig {\n",
            "  \"_name_or_path\": \"bert-base-uncased\",\n",
            "  \"architectures\": [\n",
            "    \"BertForMaskedLM\"\n",
            "  ],\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"classifier_dropout\": null,\n",
            "  \"gradient_checkpointing\": false,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"layer_norm_eps\": 1e-12,\n",
            "  \"max_position_embeddings\": 512,\n",
            "  \"model_type\": \"bert\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"pad_token_id\": 0,\n",
            "  \"position_embedding_type\": \"absolute\",\n",
            "  \"transformers_version\": \"4.16.2\",\n",
            "  \"type_vocab_size\": 2,\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 30522\n",
            "}\n",
            "\n",
            "loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e\n",
            "Model config BertConfig {\n",
            "  \"architectures\": [\n",
            "    \"BertForMaskedLM\"\n",
            "  ],\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"classifier_dropout\": null,\n",
            "  \"gradient_checkpointing\": false,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"id2label\": {\n",
            "    \"0\": \"LABEL_0\",\n",
            "    \"1\": \"LABEL_1\",\n",
            "    \"2\": \"LABEL_2\",\n",
            "    \"3\": \"LABEL_3\"\n",
            "  },\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"label2id\": {\n",
            "    \"LABEL_0\": 0,\n",
            "    \"LABEL_1\": 1,\n",
            "    \"LABEL_2\": 2,\n",
            "    \"LABEL_3\": 3\n",
            "  },\n",
            "  \"layer_norm_eps\": 1e-12,\n",
            "  \"max_position_embeddings\": 512,\n",
            "  \"model_type\": \"bert\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"pad_token_id\": 0,\n",
            "  \"position_embedding_type\": \"absolute\",\n",
            "  \"problem_type\": \"multi_label_classification\",\n",
            "  \"transformers_version\": \"4.16.2\",\n",
            "  \"type_vocab_size\": 2,\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 30522\n",
            "}\n",
            "\n",
            "loading weights file https://huggingface.co/bert-base-uncased/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/a8041bf617d7f94ea26d15e218abd04afc2004805632abc0ed2066aa16d50d04.faf6ea826ae9c5867d12b22257f9877e6b8367890837bd60f7c54a29633f7f2f\n",
            "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']\n",
            "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
            "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
            "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
            "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
          ]
        }
      ],
      "source": [
        "model_name = 'bert-base-uncased'\n",
        "tokenizer = BertTokenizer.from_pretrained(model_name)\n",
        "model = BertForSequenceClassification.from_pretrained(model_name, problem_type=\"multi_label_classification\", num_labels=4).to(device)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 299
        },
        "id": "mzfWUCNomzv6",
        "outputId": "190e4b3d-cb6e-4ca9-90cc-aa0816834b19"
      },
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEICAYAAABRSj9aAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAaIElEQVR4nO3df5DU933f8edLYJDla1CRlKsjsMAG6iKnIdUJ5MZ17qTYPiVxzm5RykUmeAaVOg6ZJK3i4ExCJMXpVJlU1B3LSS4VDUUqhwY1ztU+j+IadlynMgYiKQbZyAeSRsiyVThkeS0jjPXuH/thsl7t3S13++O+H16PmRt9f3x2v6/v1+fXfvnc7Z4iAjMzy9clnQ5gZmat5aI3M8uci97MLHMuejOzzLnozcwy56I3M8uci946TtIRSb2dztFJkt4v6VlJZUk/2ek8lhcXvbWUpKcl/UzNtg9K+uL59Yi4NiJKUzzPEkkhaW6LonbaHwObI6IrIh6tN0AVxyU90eZsVnAuejNgFryAXAMcmWLMO4EfBd4s6frWR7JcuOit46rv+iWtlnRQ0kuSviXpnjTsC+m/L6bpjbdLukTS70p6RtILkv67pAVVz/vLad8pSb9Xc5w7JO2RdL+kl4APpmM/IulFSc9L+oSkeVXPF5I+LOnrkr4j6Q8kvUXS/015H6weX3OOdbNKmi+pDMwBHpd0bJJLtQH4K2A0LVc//1JJX0i5/rekeyXdX7X/hpTzRUmPX+xTZRcbF73NNh8HPh4RPwK8BXgwbX9n+u/laXrjEeCD6asPeDPQBXwCQNJK4JPArcAbgQXA1TXHGgD2AJcDDwA/AH4TuBJ4O3AT8OGax7wHuA64AfgIMAR8AFgMvA0YnOC86maNiFcioiuN+YmIeEu9B0u6DFibcj4ArKt5UfkfwJeBK4A7gPVVj70a+AzwMWAhcDvwkKSrJshqmXHRWzt8Kt1JvijpRSoFPJHvA8skXRkR5Yj40iRjbwXuiYjjEVEGPkqlAOdSKcX/FRFfjIizwFag9oOdHomIT0XEqxHxvYg4FBFfiohzEfE08GfAT9c85o8i4qWIOAIcBv46Hf/bwGeBiX6QOlnWRvxL4BXgr6mU9uuAnwOQ9CbgemBrRJyNiC8CI1WP/QAwGhGj6Vw/BxwEfrbBY1vBueitHd4XEZef/+K1d8nVNgIrgK9JOiDp5ycZ+2PAM1XrzwBzge6079nzOyLiZeBUzeOfrV6RtELSpyV9M03n/Acqd/fVvlW1/L06613UN1nWRmwAHkwvQmeAh/j76ZsfA8bTOZ5XfW7XALfUvNi+g8q/dOwi0OkfQJn9kIj4OjAo6RIqd7F7JF3Ba+/GAb5BpcTOexNwjkr5Pg/84/M7JL2eyrTGDx2uZv1PgEeBwYj4jqTfoPIvg2aYLOukJC0CbgRWS/pXafNlwKWSrqRyrgslXVZV9ournuJZYGdE/JsZnoMVlO/obVaR9AFJV0XEq8CLafOrwP9L/31z1fBdwG+mH0R2UbkD3x0R56jMvb9X0j9Pc9l3AJri8P8AeAkoS3or8CvNOq8psk5lPfAklReuVelrBXCCyovSM1SmYu6QNE/S24H3Vj3+firX4j2S5ki6VFJvegGxi4CL3mabfuBI+k2UjwPr0vz5y8AfAn+Tph9uALYDO6n8Rs5TwBng1wDSHPqvAcNU7njLwAtU5rkncjvwS8B3gD8HdjfxvCbM2oANwCcj4pvVX8Cf8vfTN7dS+QHyKSo/dN1NOteIeJbKD55/h8oL5rPAb+H//1805D88YheDdBf9IrA8Ip7qdJ5Wk7Qb+FpE/H6ns1jn+RXdsiXpvZIuk/QGKu88/QrwdGdTtYak69Pv9F8iqZ/KHfynOp3LZgcXveVsgMoPQb8BLKcyDZTrP2H/EVCiMkX1X4BfmeijFOzi46kbM7PM+Y7ezCxzs+736K+88spYsmRJS4/x3e9+lze84Q0tPUYrFTl/kbNDsfMXOTsUO387sh86dOhkRNT9WItZV/RLlizh4MGDLT1GqVSit7e3pcdopSLnL3J2KHb+ImeHYudvR3ZJz0y0z1M3ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZm3XvjDWbzZ58Eu68s9MppmdwsP3Z9+1r7/GsPt/Rm5llzkVvZpY5F72ZWeZc9GZmmXPRm5llzkVvZpY5F72ZWeZc9GZmmXPRm5llzkVvZpY5F72ZWeYaKnpJ/ZKOShqTtKXO/vmSdqf9+yUtSdtvlfRY1derklY19xTMzGwyUxa9pDnAvcDNwEpgUNLKmmEbgdMRsQzYBtwNEBEPRMSqiFgFrAeeiojHmnkCZmY2uUbu6FcDYxFxPCLOAsPAQM2YAWBHWt4D3CRJNWMG02PNzKyNFBGTD5DWAv0RcVtaXw+siYjNVWMOpzEn0vqxNOZk1ZhjwEBEHK5zjE3AJoDu7u7rhodb+3pQLpfp6upq6TFaqcj5i5wd4OTJMuPjxcy/cGH7s69Y0bznKvL3Tjuy9/X1HYqInnr72vJ59JLWAC/XK3mAiBgChgB6enqit7e3pXlKpRKtPkYrFTl/kbMDDA2V2LWrt9MxpmVwsP3Zm/l59EX+3ul09kambp4DFletL0rb6o6RNBdYAJyq2r8O2DX9mGZmNl2NFP0BYLmkpZLmUSntkZoxI8CGtLwW2BtpTkjSJcAv4vl5M7OOmHLqJiLOSdoMPAzMAbZHxBFJdwEHI2IEuA/YKWkMGKfyYnDeO4FnI+J48+ObmdlUGpqjj4hRYLRm29aq5TPALRM8tgTcMP2IZmY2E35nrJlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZa6joJfVLOippTNKWOvvnS9qd9u+XtKRq3z+V9IikI5K+IunS5sU3M7OpTFn0kuYA9wI3AyuBQUkra4ZtBE5HxDJgG3B3euxc4H7gQxFxLdALfL9p6c3MbEqN3NGvBsYi4nhEnAWGgYGaMQPAjrS8B7hJkoB3A38XEY8DRMSpiPhBc6KbmVkjGin6q4Fnq9ZPpG11x0TEOeDbwBXACiAkPSzpbyV9ZOaRzczsQsxtw/O/A7geeBn4vKRDEfH56kGSNgGbALq7uymVSi0NVS6XW36MVipy/iJnB1i4sMzgYKnTMaalE9mb+T91kb93Op29kaJ/Dlhctb4obas35kSal18AnKJy9/+FiDgJIGkU+GfADxV9RAwBQwA9PT3R29t7wSdyIUqlEq0+RisVOX+RswMMDZXYtau30zGmZXCw/dn37WvecxX5e6fT2RuZujkALJe0VNI8YB0wUjNmBNiQltcCeyMigIeBH5d0WXoB+GngieZENzOzRkx5Rx8R5yRtplLac4DtEXFE0l3AwYgYAe4DdkoaA8apvBgQEacl3UPlxSKA0Yj4TIvOxczM6mhojj4iRoHRmm1bq5bPALdM8Nj7qfyKpZmZdYDfGWtmljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZa6joJfVLOippTNKWOvvnS9qd9u+XtCRtXyLpe5IeS19/2tz4ZmY2lSn/OLikOcC9wLuAE8ABSSMR8UTVsI3A6YhYJmkdcDfwr9O+YxGxqsm5zcysQY3c0a8GxiLieEScBYaBgZoxA8COtLwHuEmSmhfTzMymSxEx+QBpLdAfEbel9fXAmojYXDXmcBpzIq0fA9YAXcAR4EngJeB3I+L/1DnGJmATQHd393XDw8NNOLWJlctlurq6WnqMVipy/iJnBzh5ssz4eDHzL1zY/uwrVjTvuYr8vdOO7H19fYcioqfevimnbmboeeBNEXFK0nXApyRdGxEvVQ+KiCFgCKCnpyd6e3tbGqpUKtHqY7RSkfMXOTvA0FCJXbt6Ox1jWgYH2599377mPVeRv3c6nb2RqZvngMVV64vStrpjJM0FFgCnIuKViDgFEBGHgGNAE1/jzcxsKo0U/QFguaSlkuYB64CRmjEjwIa0vBbYGxEh6ar0w1wkvRlYDhxvTnQzM2vElFM3EXFO0mbgYWAOsD0ijki6CzgYESPAfcBOSWPAOJUXA4B3AndJ+j7wKvChiBhvxYmYmVl9Dc3RR8QoMFqzbWvV8hngljqPewh4aIYZzcxsBvzOWDOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy11DRS+qXdFTSmKQtdfbPl7Q77d8vaUnN/jdJKku6vTmxzcysUVMWvaQ5wL3AzcBKYFDSypphG4HTEbEM2AbcXbP/HuCzM49rZmYXqpE7+tXAWEQcj4izwDAwUDNmANiRlvcAN0kSgKT3AU8BR5oT2czMLoQiYvIB0lqgPyJuS+vrgTURsblqzOE05kRaPwasAc4AnwPeBdwOlCPij+scYxOwCaC7u/u64eHhJpzaxMrlMl1dXS09RisVOX+RswOcPFlmfLyY+RcubH/2FSua91xF/t5pR/a+vr5DEdFTb9/clh4Z7gC2RUQ53eDXFRFDwBBAT09P9Pb2tjRUqVSi1cdopSLnL3J2gKGhErt29XY6xrQMDrY/+759zXuuIn/vdDp7I0X/HLC4an1R2lZvzAlJc4EFwCkqd/VrJf0RcDnwqqQzEfGJGSc3M7OGNFL0B4DlkpZSKfR1wC/VjBkBNgCPAGuBvVGZE/oX5wdIuoPK1I1L3sysjaYs+og4J2kz8DAwB9geEUck3QUcjIgR4D5gp6QxYJzKi4GZmc0CDc3RR8QoMFqzbWvV8hnglime445p5DMzsxnyO2PNzDLnojczy5yL3swscy56M7PMuejNzDLnojczy5yL3swscy56M7PMuejNzDLnojczy5yL3swscy56M7PMuejNzDLnojczy5yL3swscy56M7PMuejNzDLnojczy5yL3swscw0VvaR+SUcljUnaUmf/fEm70/79kpak7aslPZa+Hpf0/ubGNzOzqUxZ9JLmAPcCNwMrgUFJK2uGbQROR8QyYBtwd9p+GOiJiFVAP/Bnkhr6g+RmZtYcjdzRrwbGIuJ4RJwFhoGBmjEDwI60vAe4SZIi4uWIOJe2XwpEM0KbmVnjFDF590paC/RHxG1pfT2wJiI2V405nMacSOvH0piTktYA24FrgPUR8Zd1jrEJ2ATQ3d193fDwcFNObiLlcpmurq6WHqOVipy/yNkBTp4sMz5ezPwLF7Y/+4oVzXuuIn/vtCN7X1/foYjoqbev5dMoEbEfuFbSPwF2SPpsRJypGTMEDAH09PREb29vSzOVSiVafYxWKnL+ImcHGBoqsWtXb6djTMvgYPuz79vXvOcq8vdOp7M3MnXzHLC4an1R2lZ3TJqDXwCcqh4QEV8FysDbphvWzMwuXCNFfwBYLmmppHnAOmCkZswIsCEtrwX2RkSkx8wFkHQN8Fbg6aYkNzOzhkw5dRMR5yRtBh4G5gDbI+KIpLuAgxExAtwH7JQ0BoxTeTEAeAewRdL3gVeBD0fEyVaciJmZ1dfQHH1EjAKjNdu2Vi2fAW6p87idwM4ZZjQzsxnwO2PNzDLnojczy5yL3swscy56M7PMuejNzDLnojczy5yL3swscy56M7PMuejNzDLnojczy5yL3swsc/6zfhe5vr72Hm9wEO68s73HbKbBwU4nMLtwvqM3M8uci97MLHPZTd00MhWRw/RBkfObWXv5jt7MLHMuejOzzLnozcwy11DRS+qXdFTSmKQtdfbPl7Q77d8vaUna/i5JhyR9Jf33xubGNzOzqUxZ9JLmAPcCNwMrgUFJK2uGbQROR8QyYBtwd9p+EnhvRPw4sAH//Vgzs7Zr5I5+NTAWEccj4iwwDAzUjBkAdqTlPcBNkhQRj0bEN9L2I8DrJc1vRnAzM2uMImLyAdJaoD8ibkvr64E1EbG5aszhNOZEWj+WxpyseZ4PRcTP1DnGJmATQHd393XDw8PTPqEnn5x6zMKFZcbHu6Z9jE4rcv4iZ4di5+9E9hUrmvdc5XKZrq5iXvt2ZO/r6zsUET319rXl9+glXUtlOufd9fZHxBAwBNDT0xO9vb3TPlYjv18+OFhi167pH6PTipy/yNmh2Pk7kX3fvuY9V6lUYibd0Emdzt7I1M1zwOKq9UVpW90xkuYCC4BTaX0R8JfAL0fEsZkGNjOzC9NI0R8AlktaKmkesA4YqRkzQuWHrQBrgb0REZIuBz4DbImIv2lWaDMza9yURR8R54DNwMPAV4EHI+KIpLsk/UIadh9whaQx4N8B538FczOwDNgq6bH09aNNPwszM5tQQ3P0ETEKjNZs21q1fAa4pc7jPgZ8bIYZzcxsBvzOWDOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy11DRS+qXdFTSmKQtdfbPl7Q77d8vaUnafoWkfZLKkj7R3OhmZtaIKYte0hzgXuBmYCUwKGllzbCNwOmIWAZsA+5O288Avwfc3rTEZmZ2QRq5o18NjEXE8Yg4CwwDAzVjBoAdaXkPcJMkRcR3I+KLVArfzMw6QBEx+QBpLdAfEbel9fXAmojYXDXmcBpzIq0fS2NOpvUPAj3Vj6k5xiZgE0B3d/d1w8PD0z6hJ5+ceszChWXGx7umfYxOK3L+ImeHYufvRPYVK5r3XOVyma6uYl77dmTv6+s7FBE99fbNbemRGxQRQ8AQQE9PT/T29k77ue68c+oxg4Mldu2a/jE6rcj5i5wdip2/E9n37Wvec5VKJWbSDZ3U6eyNTN08ByyuWl+UttUdI2kusAA41YyAZmY2M40U/QFguaSlkuYB64CRmjEjwIa0vBbYG1PNCZmZWVtMOXUTEeckbQYeBuYA2yPiiKS7gIMRMQLcB+yUNAaMU3kxAEDS08CPAPMkvQ94d0Q80fxTMTOzehqao4+IUWC0ZtvWquUzwC0TPHbJDPKZmdkM+Z2xZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZmxV/YcrM8tTX17znGhxs7C/IzUaNZm/mX+Sq5jt6M7PMuejNzDLnojczy5yL3swscy56M7PMNVT0kvolHZU0JmlLnf3zJe1O+/dLWlK176Np+1FJ72ledDMza8SURS9pDnAvcDOwEhiUtLJm2EbgdEQsA7YBd6fHrgTWAdcC/cAn0/OZmVmbNHJHvxoYi4jjEXEWGAYGasYMADvS8h7gJklK24cj4pWIeAoYS89nZmZt0sgbpq4Gnq1aPwGsmWhMRJyT9G3girT9SzWPvbr2AJI2AZvSalnS0YbST1OpxJXAyVYeo5WKnL/I2aHY+YucHYqdv9Hs0owOc81EO2bFO2MjYggYatfxJB2MiJ52Ha/Zipy/yNmh2PmLnB2Knb/T2RuZunkOWFy1vihtqztG0lxgAXCqwceamVkLNVL0B4DlkpZKmkflh6sjNWNGgA1peS2wNyIibV+XfitnKbAc+HJzopuZWSOmnLpJc+6bgYeBOcD2iDgi6S7gYESMAPcBOyWNAeNUXgxI4x4EngDOAb8aET9o0blciLZNE7VIkfMXOTsUO3+Rs0Ox83c0uyo33mZmliu/M9bMLHMuejOzzGVf9JK2S3pB0uGqbXdIek7SY+nrZzuZcSKSFkvaJ+kJSUck/XravlDS5yR9Pf33H3Y6az2T5J/111/SpZK+LOnxlP3OtH1p+piPsfSxH/M6nbWeSfL/haSnqq79qk5nnYikOZIelfTptF6Ia39enfwdu/bZFz3wF1Q+fqHWtohYlb5G25ypUeeAfx8RK4EbgF9NHyuxBfh8RCwHPp/WZ6OJ8sPsv/6vADdGxE8Aq4B+STdQ+XiPbenjPk5T+fiP2Wii/AC/VXXtH+tcxCn9OvDVqvWiXPvzavNDh6599kUfEV+g8ptAhRMRz0fE36bl71D5prmaH/7IiR3A+zqTcHKT5J/1oqKcVl+XvgK4kcrHfMDsvvYT5S8ESYuAnwP+a1oXBbn28Nr8nZZ90U9is6S/S1M7s3Lqo1r6RNCfBPYD3RHxfNr1TaC7Q7EaVpMfCnD90z+9HwNeAD4HHANejIhzaUjdj/SYLWrzR8T5a/+H6dpvkzS/gxEn85+BjwCvpvUrKNC157X5z+vItb9Yi/5PgLdQ+Sft88B/6mycyUnqAh4CfiMiXqrel96YNqvv1OrkL8T1j4gfRMQqKu/oXg28tcORLkhtfklvAz5K5TyuBxYCv93BiHVJ+nnghYg41Oks0zFJ/o5d+4uy6CPiW+n/BK8Cf84s/kRNSa+jUpIPRMT/TJu/JemNaf8bqdyxzUr18hfp+gNExIvAPuDtwOXpYz6gIB/pUZW/P02nRUS8Avw3Zue1/yngFyQ9TeXTcm8EPk5xrv1r8ku6v5PX/qIs+vMlmbwfODzR2E5K85L3AV+NiHuqdlV/5MQG4K/ana0RE+UvwvWXdJWky9Py64F3UfkZwz4qH/MBs/va18v/taobBFGZ45511z4iPhoRiyJiCZV32e+NiFspyLWfIP8HOnntZ8WnV7aSpF1AL3ClpBPA7wO96VebAnga+LcdCzi5nwLWA19Jc60AvwP8R+BBSRuBZ4Bf7FC+qUyUf7AA1/+NwA5V/lDOJcCDEfFpSU8Aw5I+BjxK5YVsNpoo/15JVwECHgM+1MmQF+i3Kca1n8gDnbr2/ggEM7PMXZRTN2ZmFxMXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZ+//gbHXWr/GHYAAAAABJRU5ErkJggg==\n",
            "text/plain": [
              "<Figure size 432x288 with 1 Axes>"
            ]
          },
          "metadata": {
            "needs_background": "light"
          }
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "<Figure size 10000x10000 with 0 Axes>"
            ]
          },
          "metadata": {}
        }
      ],
      "source": [
        "n, bins, patches = plt.hist(data['age'], 4, density=True, facecolor='b', alpha=0.75)\n",
        "\n",
        "plt.title('Histogram of Age')\n",
        "plt.grid(True)\n",
        "plt.figure(figsize=(100,100), dpi=100)\n",
        "plt.show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 206
        },
        "id": "v6WbfLpimzv7",
        "outputId": "309e9c1b-5730-4994-cd9a-59078ba1a2b2"
      },
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "\n",
              "  <div id=\"df-a13e500d-34c9-4227-aecc-9d385a845574\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>id</th>\n",
              "      <th>gender</th>\n",
              "      <th>age</th>\n",
              "      <th>topic</th>\n",
              "      <th>sign</th>\n",
              "      <th>date</th>\n",
              "      <th>text</th>\n",
              "      <th>label</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>2059027</td>\n",
              "      <td>male</td>\n",
              "      <td>15</td>\n",
              "      <td>Student</td>\n",
              "      <td>Leo</td>\n",
              "      <td>14,May,2004</td>\n",
              "      <td>Info has been found (+/- 100 pages,...</td>\n",
              "      <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>2059027</td>\n",
              "      <td>male</td>\n",
              "      <td>15</td>\n",
              "      <td>Student</td>\n",
              "      <td>Leo</td>\n",
              "      <td>13,May,2004</td>\n",
              "      <td>These are the team members:   Drewe...</td>\n",
              "      <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>2059027</td>\n",
              "      <td>male</td>\n",
              "      <td>15</td>\n",
              "      <td>Student</td>\n",
              "      <td>Leo</td>\n",
              "      <td>12,May,2004</td>\n",
              "      <td>In het kader van kernfusie op aarde...</td>\n",
              "      <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>2059027</td>\n",
              "      <td>male</td>\n",
              "      <td>15</td>\n",
              "      <td>Student</td>\n",
              "      <td>Leo</td>\n",
              "      <td>12,May,2004</td>\n",
              "      <td>testing!!!  testing!!!</td>\n",
              "      <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>3581210</td>\n",
              "      <td>male</td>\n",
              "      <td>33</td>\n",
              "      <td>InvestmentBanking</td>\n",
              "      <td>Aquarius</td>\n",
              "      <td>11,June,2004</td>\n",
              "      <td>Thanks to Yahoo!'s Toolbar I can ...</td>\n",
              "      <td>[0.0, 0.0, 1.0, 0.0]</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-a13e500d-34c9-4227-aecc-9d385a845574')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-a13e500d-34c9-4227-aecc-9d385a845574 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-a13e500d-34c9-4227-aecc-9d385a845574');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ],
            "text/plain": [
              "        id  ...                 label\n",
              "0  2059027  ...  [1.0, 0.0, 0.0, 0.0]\n",
              "1  2059027  ...  [1.0, 0.0, 0.0, 0.0]\n",
              "2  2059027  ...  [1.0, 0.0, 0.0, 0.0]\n",
              "3  2059027  ...  [1.0, 0.0, 0.0, 0.0]\n",
              "4  3581210  ...  [0.0, 0.0, 1.0, 0.0]\n",
              "\n",
              "[5 rows x 8 columns]"
            ]
          },
          "metadata": {},
          "execution_count": 104
        }
      ],
      "source": [
        "\"\"\"\n",
        "1 - 22 -> 1 klasa\n",
        "23 - 31 -> 2 klasa\n",
        "32 - 39 -> 3 klasa \n",
        "40 - 48 -> 4 klasa\n",
        "\"\"\"\n",
        "def mapAgeToClass2(value: pd.DataFrame) -> int:\n",
        "    if(value['age'] <=22):\n",
        "        return [1.0,0.0,0.0,0.0]\n",
        "    elif(value['age'] > 22 and value['age'] <= 31):\n",
        "        return [0.0,1.0,0.0,0.0]\n",
        "    elif(value['age'] > 31 and value['age'] <= 39):\n",
        "        return [0.0,0.0,1.0,0.0]\n",
        "    else:\n",
        "        return [0.0,0.0,0.0,1.0]\n",
        "    \n",
        "data['label'] = data.apply(lambda row: mapAgeToClass2(row), axis=1)\n",
        "data.head()\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "2CVdpLTWmzv8"
      },
      "outputs": [],
      "source": [
        "X = list(data['text'])\n",
        "Y = list(data['label'])\n",
        "\n",
        "X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2)\n",
        "X_train_tokenized = tokenizer(X_train, padding=True, truncation=True, max_length=512)\n",
        "X_val_tokenized = tokenizer(X_val, padding=True, truncation=True, max_length=512)\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "WAuTGUvKmzv9"
      },
      "outputs": [],
      "source": [
        "class Dataset(torch.utils.data.Dataset):\n",
        "    def __init__(self, encodings, labels=None):\n",
        "        self.encodings = encodings\n",
        "        self.labels = labels\n",
        "\n",
        "    def __getitem__(self, idx):\n",
        "        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}\n",
        "        if self.labels:\n",
        "            item[\"labels\"] = torch.tensor(self.labels[idx])\n",
        "        return item\n",
        "\n",
        "    def __len__(self):\n",
        "        return len(self.encodings[\"input_ids\"])"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "PKJ5TCMTmzv-"
      },
      "outputs": [],
      "source": [
        "train_dataset = Dataset(X_train_tokenized, y_train)\n",
        "val_dataset = Dataset(X_val_tokenized, y_val)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "QDQ0m8Iomzv_"
      },
      "outputs": [],
      "source": [
        "def compute_metrics(p):\n",
        "    pred, labels = p\n",
        "    pred = np.argmax(pred, axis=1)\n",
        "    labels = np.argmax(labels, axis=1)\n",
        "\n",
        "    accuracy = accuracy_score(y_true=labels, y_pred=pred)\n",
        "    recall = recall_score(y_true=labels, y_pred=pred, average='micro')\n",
        "    precision = precision_score(y_true=labels, y_pred=pred, average='micro')\n",
        "    f1 = f1_score(y_true=labels, y_pred=pred, average='micro')\n",
        "\n",
        "    return {\"accuracy\": accuracy, \"precision\": precision, \"recall\": recall, \"f1\": f1}\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "5gxNl9bvmzwB",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "13e561ad-9da6-4bde-b823-11a7ca7e6184"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "PyTorch: setting up devices\n",
            "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
          ]
        }
      ],
      "source": [
        "args = TrainingArguments(\n",
        "    output_dir=\"output\",\n",
        "    evaluation_strategy=\"steps\",\n",
        "    eval_steps=100,\n",
        "    per_device_train_batch_size=8,\n",
        "    per_device_eval_batch_size=8,\n",
        "    num_train_epochs=3,\n",
        "    seed=0,\n",
        "    load_best_model_at_end=True,\n",
        ")\n",
        "trainer = Trainer(\n",
        "    model=model,\n",
        "    args=args,\n",
        "    train_dataset=train_dataset,\n",
        "    eval_dataset=val_dataset,\n",
        "    compute_metrics=compute_metrics,\n",
        "    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "id": "dZ8FtrmnmzwB",
        "outputId": "5cf81705-3255-4e0c-ee0e-fea255fe0c66"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
            "  FutureWarning,\n",
            "***** Running training *****\n",
            "  Num examples = 4000\n",
            "  Num Epochs = 3\n",
            "  Instantaneous batch size per device = 8\n",
            "  Total train batch size (w. parallel, distributed & accumulation) = 8\n",
            "  Gradient Accumulation steps = 1\n",
            "  Total optimization steps = 1500\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/html": [
              "\n",
              "    <div>\n",
              "      \n",
              "      <progress value='1100' max='1500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
              "      [1100/1500 12:04 < 04:23, 1.52 it/s, Epoch 2/3]\n",
              "    </div>\n",
              "    <table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              " <tr style=\"text-align: left;\">\n",
              "      <th>Step</th>\n",
              "      <th>Training Loss</th>\n",
              "      <th>Validation Loss</th>\n",
              "      <th>Accuracy</th>\n",
              "      <th>Precision</th>\n",
              "      <th>Recall</th>\n",
              "      <th>F1</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <td>100</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.308495</td>\n",
              "      <td>0.721000</td>\n",
              "      <td>0.721000</td>\n",
              "      <td>0.721000</td>\n",
              "      <td>0.721000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>200</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.267907</td>\n",
              "      <td>0.793000</td>\n",
              "      <td>0.793000</td>\n",
              "      <td>0.793000</td>\n",
              "      <td>0.793000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>300</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.246032</td>\n",
              "      <td>0.786000</td>\n",
              "      <td>0.786000</td>\n",
              "      <td>0.786000</td>\n",
              "      <td>0.786000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>400</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.235976</td>\n",
              "      <td>0.796000</td>\n",
              "      <td>0.796000</td>\n",
              "      <td>0.796000</td>\n",
              "      <td>0.796000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>500</td>\n",
              "      <td>0.297000</td>\n",
              "      <td>0.217070</td>\n",
              "      <td>0.830000</td>\n",
              "      <td>0.830000</td>\n",
              "      <td>0.830000</td>\n",
              "      <td>0.830000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>600</td>\n",
              "      <td>0.297000</td>\n",
              "      <td>0.232244</td>\n",
              "      <td>0.828000</td>\n",
              "      <td>0.828000</td>\n",
              "      <td>0.828000</td>\n",
              "      <td>0.828000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>700</td>\n",
              "      <td>0.297000</td>\n",
              "      <td>0.198891</td>\n",
              "      <td>0.853000</td>\n",
              "      <td>0.853000</td>\n",
              "      <td>0.853000</td>\n",
              "      <td>0.853000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>800</td>\n",
              "      <td>0.297000</td>\n",
              "      <td>0.202887</td>\n",
              "      <td>0.851000</td>\n",
              "      <td>0.851000</td>\n",
              "      <td>0.851000</td>\n",
              "      <td>0.851000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>900</td>\n",
              "      <td>0.297000</td>\n",
              "      <td>0.228751</td>\n",
              "      <td>0.847000</td>\n",
              "      <td>0.847000</td>\n",
              "      <td>0.847000</td>\n",
              "      <td>0.847000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1000</td>\n",
              "      <td>0.153700</td>\n",
              "      <td>0.221675</td>\n",
              "      <td>0.850000</td>\n",
              "      <td>0.850000</td>\n",
              "      <td>0.850000</td>\n",
              "      <td>0.850000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1100</td>\n",
              "      <td>0.153700</td>\n",
              "      <td>0.218299</td>\n",
              "      <td>0.866000</td>\n",
              "      <td>0.866000</td>\n",
              "      <td>0.866000</td>\n",
              "      <td>0.866000</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table><p>"
            ],
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "Saving model checkpoint to output/checkpoint-500\n",
            "Configuration saved in output/checkpoint-500/config.json\n",
            "Model weights saved in output/checkpoint-500/pytorch_model.bin\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "Saving model checkpoint to output/checkpoint-1000\n",
            "Configuration saved in output/checkpoint-1000/config.json\n",
            "Model weights saved in output/checkpoint-1000/pytorch_model.bin\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "\n",
            "\n",
            "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
            "\n",
            "\n",
            "Loading best model from output/checkpoint-500 (score: 0.21706973016262054).\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "TrainOutput(global_step=1100, training_loss=0.212534950429743, metrics={'train_runtime': 724.5874, 'train_samples_per_second': 16.561, 'train_steps_per_second': 2.07, 'total_flos': 2315418864844800.0, 'train_loss': 0.212534950429743, 'epoch': 2.2})"
            ]
          },
          "metadata": {},
          "execution_count": 110
        }
      ],
      "source": [
        "trainer.train()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "1FQxrdS9sGXZ",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 89
        },
        "outputId": "24d69727-de29-431a-85b8-f7a31c394c39"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "***** Running Prediction *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/html": [
              "\n",
              "    <div>\n",
              "      \n",
              "      <progress value='125' max='125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
              "      [125/125 00:19]\n",
              "    </div>\n",
              "    "
            ],
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ]
          },
          "metadata": {}
        }
      ],
      "source": [
        "result = trainer.predict(val_dataset)"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print(result.metrics)"
      ],
      "metadata": {
        "id": "9QtUEeoVxJkt",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "077812cc-3848-4b56-ac52-a9368920819b"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'test_loss': 0.21706973016262054, 'test_accuracy': 0.83, 'test_precision': 0.83, 'test_recall': 0.83, 'test_f1': 0.83, 'test_runtime': 19.3166, 'test_samples_per_second': 51.769, 'test_steps_per_second': 6.471}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "filename = 'model_encoder.pkl'\n",
        "trainer.save_model(filename)"
      ],
      "metadata": {
        "id": "R3yrxs0ANvEQ",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "5ef8e1be-f0d5-4d76-a94f-f3eb29317a92"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Saving model checkpoint to model_encoder.pkl\n",
            "Configuration saved in model_encoder.pkl/config.json\n",
            "Model weights saved in model_encoder.pkl/pytorch_model.bin\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Z6ja8jhrmzwI"
      },
      "source": [
        "# Model typu decoder"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "tUf06zqBAwXG",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "df99ab7a-ea9b-4e7c-b121-3cfbbafb7347"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: transformers in /usr/local/lib/python3.7/dist-packages (4.16.2)\n",
            "Requirement already satisfied: sacremoses in /usr/local/lib/python3.7/dist-packages (from transformers) (0.0.47)\n",
            "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.11.0)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.21.5)\n",
            "Requirement already satisfied: huggingface-hub<1.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.4.0)\n",
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.62.3)\n",
            "Requirement already satisfied: tokenizers!=0.11.3,>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.11.5)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.4.2)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (6.0)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers) (3.10.0.2)\n",
            "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.7)\n",
            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.7.0)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.10.8)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n",
            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n",
            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n",
            "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n",
            "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.1.0)\n"
          ]
        }
      ],
      "source": [
        "!pip install transformers"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ],
      "metadata": {
        "id": "wble-rL7Q0Mk",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "8eca782e-ab77-4f67-df5b-fb97b9bbe481"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Mounted at /content/drive\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "id": "HBw75r5XBoui"
      },
      "outputs": [],
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score\n",
        "import torch\n",
        "from transformers import TrainingArguments, Trainer\n",
        "from transformers import EarlyStoppingCallback\n",
        "import matplotlib.pyplot as plt"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {
        "id": "AFgWRwlimzwJ",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "5ff5cd14-b8dd-425a-f9c2-5a29f9496870"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "loading file https://huggingface.co/allenai/longformer-scico/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/1ee5ff92bf5d5e992fcf9006e19b6a4ad35d7c8564ef75f4d79a1ed2153273ff.bfdcc444ff249bca1a95ca170ec350b442f81804d7df3a95a2252217574121d7\n",
            "loading file https://huggingface.co/allenai/longformer-scico/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/0bc7fa46278c9aeb0db119eeac69668e732999ecf7e70938f7fabc0c50da0ed6.f5b91da9e34259b8f4d88dbc97c740667a0e8430b96314460cdb04e86d4fc435\n",
            "loading file https://huggingface.co/allenai/longformer-scico/resolve/main/added_tokens.json from cache at /root/.cache/huggingface/transformers/3f461190d4c3e4866b53ee0eb0cc229b7868d365099be2f8e40def2f56f64bd1.b2dabb9d6f1c7ea55d3c9c1c2037f316794ad095778dd06ae6a225cc74100b76\n",
            "loading file https://huggingface.co/allenai/longformer-scico/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/d599b9f7e2161f0d2e3b9c8fd9cebef8b07c938f69b08a0a42e78c584f1b4b1e.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n",
            "loading file https://huggingface.co/allenai/longformer-scico/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/da65f1f02a542899b2b4e34dbc660a4afcad000d51ea419fc5fd6a227a122f5e.3f75ee48edc5dac7e53863302122c4a3cee3a14a708eca842a8f62714c185ca5\n",
            "loading file https://huggingface.co/allenai/longformer-scico/resolve/main/tokenizer.json from cache at None\n",
            "loading configuration file https://huggingface.co/allenai/longformer-scico/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/7e93cc5a6217edd672fdad60f054eec31e6a815697de5adae6b921e1f75836a3.6890559d1ffa3ad48d00eee0ae70669ec630881b293df48322fe4c28c7088c35\n",
            "Model config LongformerConfig {\n",
            "  \"_name_or_path\": \"allenai/longformer-scico\",\n",
            "  \"architectures\": [\n",
            "    \"LongformerForSequenceClassification\"\n",
            "  ],\n",
            "  \"attention_mode\": \"longformer\",\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"attention_window\": [\n",
            "    512,\n",
            "    512,\n",
            "    512,\n",
            "    512,\n",
            "    512,\n",
            "    512,\n",
            "    512,\n",
            "    512,\n",
            "    512,\n",
            "    512,\n",
            "    512,\n",
            "    512\n",
            "  ],\n",
            "  \"bos_token_id\": 0,\n",
            "  \"classifier_dropout\": null,\n",
            "  \"eos_token_id\": 2,\n",
            "  \"gradient_checkpointing\": false,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"id2label\": {\n",
            "    \"0\": \"not related\",\n",
            "    \"1\": \"coref\",\n",
            "    \"2\": \"parent\",\n",
            "    \"3\": \"child\"\n",
            "  },\n",
            "  \"ignore_attention_mask\": false,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"label2id\": {\n",
            "    \"child\": \"3\",\n",
            "    \"coref\": \"1\",\n",
            "    \"not related\": \"0\",\n",
            "    \"parent\": \"2\"\n",
            "  },\n",
            "  \"layer_norm_eps\": 1e-05,\n",
            "  \"max_position_embeddings\": 4098,\n",
            "  \"model_type\": \"longformer\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"pad_token_id\": 1,\n",
            "  \"position_embedding_type\": \"absolute\",\n",
            "  \"sep_token_id\": 2,\n",
            "  \"transformers_version\": \"4.16.2\",\n",
            "  \"type_vocab_size\": 1,\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 50267\n",
            "}\n",
            "\n",
            "Adding <m> to the vocabulary\n",
            "Adding </m> to the vocabulary\n",
            "loading configuration file https://huggingface.co/allenai/longformer-scico/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/7e93cc5a6217edd672fdad60f054eec31e6a815697de5adae6b921e1f75836a3.6890559d1ffa3ad48d00eee0ae70669ec630881b293df48322fe4c28c7088c35\n",
            "Model config LongformerConfig {\n",
            "  \"_name_or_path\": \"allenai/longformer-base-4096\",\n",
            "  \"architectures\": [\n",
            "    \"LongformerForSequenceClassification\"\n",
            "  ],\n",
            "  \"attention_mode\": \"longformer\",\n",
            "  \"attention_probs_dropout_prob\": 0.1,\n",
            "  \"attention_window\": [\n",
            "    512,\n",
            "    512,\n",
            "    512,\n",
            "    512,\n",
            "    512,\n",
            "    512,\n",
            "    512,\n",
            "    512,\n",
            "    512,\n",
            "    512,\n",
            "    512,\n",
            "    512\n",
            "  ],\n",
            "  \"bos_token_id\": 0,\n",
            "  \"classifier_dropout\": null,\n",
            "  \"eos_token_id\": 2,\n",
            "  \"gradient_checkpointing\": false,\n",
            "  \"hidden_act\": \"gelu\",\n",
            "  \"hidden_dropout_prob\": 0.1,\n",
            "  \"hidden_size\": 768,\n",
            "  \"id2label\": {\n",
            "    \"0\": \"not related\",\n",
            "    \"1\": \"coref\",\n",
            "    \"2\": \"parent\",\n",
            "    \"3\": \"child\"\n",
            "  },\n",
            "  \"ignore_attention_mask\": false,\n",
            "  \"initializer_range\": 0.02,\n",
            "  \"intermediate_size\": 3072,\n",
            "  \"label2id\": {\n",
            "    \"child\": \"3\",\n",
            "    \"coref\": \"1\",\n",
            "    \"not related\": \"0\",\n",
            "    \"parent\": \"2\"\n",
            "  },\n",
            "  \"layer_norm_eps\": 1e-05,\n",
            "  \"max_position_embeddings\": 4098,\n",
            "  \"model_type\": \"longformer\",\n",
            "  \"num_attention_heads\": 12,\n",
            "  \"num_hidden_layers\": 12,\n",
            "  \"pad_token_id\": 1,\n",
            "  \"position_embedding_type\": \"absolute\",\n",
            "  \"problem_type\": \"multi_label_classification\",\n",
            "  \"sep_token_id\": 2,\n",
            "  \"transformers_version\": \"4.16.2\",\n",
            "  \"type_vocab_size\": 1,\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 50267\n",
            "}\n",
            "\n",
            "loading weights file https://huggingface.co/allenai/longformer-scico/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/33709a0b0a44102dd29583428fe5253bf07cbd1ed163757382d471017620ad4d.6fd6d3de002d054747c1a5eb1e2b33e56924ad6db478547c9cf616d11dd48609\n",
            "All model checkpoint weights were used when initializing LongformerForSequenceClassification.\n",
            "\n",
            "All the weights of LongformerForSequenceClassification were initialized from the model checkpoint at allenai/longformer-scico.\n",
            "If your task is similar to the task the model of the checkpoint was trained on, you can already use LongformerForSequenceClassification for predictions without further training.\n",
            "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py:2882: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version.\n",
            "\n",
            "\n",
            "  exec(code_obj, self.user_global_ns, self.user_ns)\n",
            "Skipping line 16844: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 19370: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 31753: field larger than field limit (131072)\n",
            "Skipping line 33676: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 65976: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 116130: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 127080: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 154052: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 174200: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 189740: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 274245: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 275624: field larger than field limit (131072)\n",
            "Skipping line 302668: field larger than field limit (131072)\n",
            "Skipping line 307322: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 317541: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 333957: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 345859: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 359845: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 359846: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 359847: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 359849: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 371329: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 384761: field larger than field limit (131072)\n",
            "Skipping line 389712: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 391820: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 398927: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 401260: field larger than field limit (131072)\n",
            "Skipping line 403079: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 454667: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 465419: field larger than field limit (131072)\n",
            "Skipping line 466152: field larger than field limit (131072)\n",
            "Skipping line 485309: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 529874: field larger than field limit (131072)\n",
            "Skipping line 552169: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 554628: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 560429: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 589855: field larger than field limit (131072)\n",
            "Skipping line 601507: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 614020: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 630106: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 632882: field larger than field limit (131072)\n",
            "Skipping line 637573: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 658667: field larger than field limit (131072)\n"
          ]
        }
      ],
      "source": [
        "from transformers import LongformerTokenizer, LongformerForSequenceClassification, LongformerConfig\n",
        "\n",
        "model_name = \"allenai/longformer-scico\"\n",
        "config = LongformerConfig(attention_window=32)\n",
        "config.attention_window=32\n",
        "tokenizer = LongformerTokenizer.from_pretrained(model_name)\n",
        "model = LongformerForSequenceClassification(config).from_pretrained(model_name, problem_type=\"multi_label_classification\")\n",
        "\n",
        "\"\"\"\n",
        "1 - 22 -> 1 klasa\n",
        "23 - 31 -> 2 klasa\n",
        "32 - 39 -> 3 klasa \n",
        "40 - 48 -> 4 klasa\n",
        "\"\"\"\n",
        "\n",
        "def mapAgeToClass2(value: pd.DataFrame) -> int:\n",
        "    if(value['age'] <=22):\n",
        "        return [1.0,0.0,0.0,0.0]\n",
        "    elif(value['age'] > 22 and value['age'] <= 31):\n",
        "        return [0.0,1.0,0.0,0.0]\n",
        "    elif(value['age'] > 31 and value['age'] <= 39):\n",
        "        return [0.0,0.0,1.0,0.0]\n",
        "    else:\n",
        "        return [0.0,0.0,0.0,1.0]\n",
        "\n",
        "data_path = 'drive/MyDrive/blogtext.csv'\n",
        "\n",
        "data = pd.read_csv(data_path, error_bad_lines=False, engine='python')\n",
        "data = data[:data_amount]\n",
        "data['label'] = data.apply(lambda row: mapAgeToClass2(row), axis=1)\n",
        "\n",
        "\n",
        "X = list(data['text'])\n",
        "Y = list(data['label'])\n",
        "if (torch.cuda.is_available()):\n",
        "    device = \"cuda:0\"\n",
        "    torch.cuda.empty_cache()\n",
        "\n",
        "\n",
        "X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2)\n",
        "\n",
        "X_train_tokenized = tokenizer(X_train, padding=True, truncation=True, max_length=128)\n",
        "X_val_tokenized = tokenizer(X_val, padding=True, truncation=True, max_length=128)\n",
        "\n",
        "class Dataset(torch.utils.data.Dataset):\n",
        "    def __init__(self, encodings, labels=None):\n",
        "        self.encodings = encodings\n",
        "        self.labels = labels\n",
        "\n",
        "    def __getitem__(self, idx):\n",
        "        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}\n",
        "        if self.labels:\n",
        "            item[\"labels\"] = torch.tensor(self.labels[idx])\n",
        "        return item\n",
        "\n",
        "    def __len__(self):\n",
        "        return len(self.encodings[\"input_ids\"])\n",
        "\n",
        "train_dataset = Dataset(X_train_tokenized, y_train)\n",
        "val_dataset = Dataset(X_val_tokenized, y_val)\n",
        "\n",
        "def compute_metrics(p):\n",
        "    pred, labels = p\n",
        "    pred = np.argmax(pred, axis=1)\n",
        "    labels = np.argmax(labels, axis=1)\n",
        "\n",
        "    accuracy = accuracy_score(y_true=labels, y_pred=pred)\n",
        "    recall = recall_score(y_true=labels, y_pred=pred, average='micro')\n",
        "    precision = precision_score(y_true=labels, y_pred=pred, average='micro')\n",
        "    f1 = f1_score(y_true=labels, y_pred=pred, average='micro')\n",
        "\n",
        "    return {\"accuracy\": accuracy, \"precision\": precision, \"recall\": recall, \"f1\": f1}\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {
        "id": "b-3I70XlmzwK",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "89edfdfb-2462-4bce-ab20-9d9572388be3"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "PyTorch: setting up devices\n",
            "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
          ]
        }
      ],
      "source": [
        "args = TrainingArguments(\n",
        "    output_dir=\"output\",\n",
        "    evaluation_strategy=\"steps\",\n",
        "    eval_steps=100,\n",
        "    per_device_train_batch_size=8,\n",
        "    per_device_eval_batch_size=8,\n",
        "    num_train_epochs=3,\n",
        "    seed=0,\n",
        "    load_best_model_at_end=True\n",
        ")\n",
        "trainer = Trainer(\n",
        "    model=model,\n",
        "    args=args,\n",
        "    train_dataset=train_dataset,\n",
        "    eval_dataset=val_dataset,\n",
        "    compute_metrics=compute_metrics,\n",
        "    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {
        "id": "4lvaWP9RmzwK",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "a170114b-47ff-4080-ab78-164a0dcc3c15"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
            "  FutureWarning,\n",
            "***** Running training *****\n",
            "  Num examples = 4000\n",
            "  Num Epochs = 3\n",
            "  Instantaneous batch size per device = 8\n",
            "  Total train batch size (w. parallel, distributed & accumulation) = 8\n",
            "  Gradient Accumulation steps = 1\n",
            "  Total optimization steps = 1500\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/html": [
              "\n",
              "    <div>\n",
              "      \n",
              "      <progress value='1500' max='1500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
              "      [1500/1500 35:53, Epoch 3/3]\n",
              "    </div>\n",
              "    <table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              " <tr style=\"text-align: left;\">\n",
              "      <th>Step</th>\n",
              "      <th>Training Loss</th>\n",
              "      <th>Validation Loss</th>\n",
              "      <th>Accuracy</th>\n",
              "      <th>Precision</th>\n",
              "      <th>Recall</th>\n",
              "      <th>F1</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <td>100</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.407021</td>\n",
              "      <td>0.625000</td>\n",
              "      <td>0.625000</td>\n",
              "      <td>0.625000</td>\n",
              "      <td>0.625000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>200</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.333797</td>\n",
              "      <td>0.690000</td>\n",
              "      <td>0.690000</td>\n",
              "      <td>0.690000</td>\n",
              "      <td>0.690000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>300</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.403388</td>\n",
              "      <td>0.644000</td>\n",
              "      <td>0.644000</td>\n",
              "      <td>0.644000</td>\n",
              "      <td>0.644000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>400</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.296055</td>\n",
              "      <td>0.747000</td>\n",
              "      <td>0.747000</td>\n",
              "      <td>0.747000</td>\n",
              "      <td>0.747000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>500</td>\n",
              "      <td>0.370100</td>\n",
              "      <td>0.318152</td>\n",
              "      <td>0.713000</td>\n",
              "      <td>0.713000</td>\n",
              "      <td>0.713000</td>\n",
              "      <td>0.713000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>600</td>\n",
              "      <td>0.370100</td>\n",
              "      <td>0.301799</td>\n",
              "      <td>0.740000</td>\n",
              "      <td>0.740000</td>\n",
              "      <td>0.740000</td>\n",
              "      <td>0.740000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>700</td>\n",
              "      <td>0.370100</td>\n",
              "      <td>0.295635</td>\n",
              "      <td>0.715000</td>\n",
              "      <td>0.715000</td>\n",
              "      <td>0.715000</td>\n",
              "      <td>0.715000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>800</td>\n",
              "      <td>0.370100</td>\n",
              "      <td>0.268345</td>\n",
              "      <td>0.765000</td>\n",
              "      <td>0.765000</td>\n",
              "      <td>0.765000</td>\n",
              "      <td>0.765000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>900</td>\n",
              "      <td>0.370100</td>\n",
              "      <td>0.282199</td>\n",
              "      <td>0.753000</td>\n",
              "      <td>0.753000</td>\n",
              "      <td>0.753000</td>\n",
              "      <td>0.753000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1000</td>\n",
              "      <td>0.294600</td>\n",
              "      <td>0.265310</td>\n",
              "      <td>0.788000</td>\n",
              "      <td>0.788000</td>\n",
              "      <td>0.788000</td>\n",
              "      <td>0.788000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1100</td>\n",
              "      <td>0.294600</td>\n",
              "      <td>0.268466</td>\n",
              "      <td>0.789000</td>\n",
              "      <td>0.789000</td>\n",
              "      <td>0.789000</td>\n",
              "      <td>0.789000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1200</td>\n",
              "      <td>0.294600</td>\n",
              "      <td>0.245028</td>\n",
              "      <td>0.804000</td>\n",
              "      <td>0.804000</td>\n",
              "      <td>0.804000</td>\n",
              "      <td>0.804000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1300</td>\n",
              "      <td>0.294600</td>\n",
              "      <td>0.260589</td>\n",
              "      <td>0.808000</td>\n",
              "      <td>0.808000</td>\n",
              "      <td>0.808000</td>\n",
              "      <td>0.808000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1400</td>\n",
              "      <td>0.294600</td>\n",
              "      <td>0.247587</td>\n",
              "      <td>0.807000</td>\n",
              "      <td>0.807000</td>\n",
              "      <td>0.807000</td>\n",
              "      <td>0.807000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1500</td>\n",
              "      <td>0.213700</td>\n",
              "      <td>0.242638</td>\n",
              "      <td>0.824000</td>\n",
              "      <td>0.824000</td>\n",
              "      <td>0.824000</td>\n",
              "      <td>0.824000</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table><p>"
            ],
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "\u001b[1;30;43mStrumieniowane dane wyjściowe obcięte do 5000 ostatnich wierszy.\u001b[0m\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Saving model checkpoint to output/checkpoint-500\n",
            "Configuration saved in output/checkpoint-500/config.json\n",
            "Model weights saved in output/checkpoint-500/pytorch_model.bin\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Saving model checkpoint to output/checkpoint-1000\n",
            "Configuration saved in output/checkpoint-1000/config.json\n",
            "Model weights saved in output/checkpoint-1000/pytorch_model.bin\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Saving model checkpoint to output/checkpoint-1500\n",
            "Configuration saved in output/checkpoint-1500/config.json\n",
            "Model weights saved in output/checkpoint-1500/pytorch_model.bin\n",
            "\n",
            "\n",
            "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
            "\n",
            "\n",
            "Loading best model from output/checkpoint-1500 (score: 0.24263811111450195).\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "TrainOutput(global_step=1500, training_loss=0.29278671264648437, metrics={'train_runtime': 2154.3722, 'train_samples_per_second': 5.57, 'train_steps_per_second': 0.696, 'total_flos': 985291591680000.0, 'train_loss': 0.29278671264648437, 'epoch': 3.0})"
            ]
          },
          "metadata": {},
          "execution_count": 8
        }
      ],
      "source": [
        "trainer.train()"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "result = trainer.predict(val_dataset)"
      ],
      "metadata": {
        "id": "YTeHJ_c6I2iy",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "f2e1ab2b-807c-473c-f73a-0e0815b1b4a8"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "***** Running Prediction *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/html": [
              "\n",
              "    <div>\n",
              "      \n",
              "      <progress value='125' max='125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
              "      [125/125 00:37]\n",
              "    </div>\n",
              "    "
            ],
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
            "Initializing global attention on CLS token...\n",
            "Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print(result.metrics)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "RWdJsGGYz9-p",
        "outputId": "ab22383a-3ac4-4832-800d-f1a5415b173c"
      },
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'test_loss': 0.24263811111450195, 'test_accuracy': 0.824, 'test_precision': 0.824, 'test_recall': 0.824, 'test_f1': 0.824, 'test_runtime': 38.0024, 'test_samples_per_second': 26.314, 'test_steps_per_second': 3.289}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "filename='model_decoder'\n",
        "trainer.save_model(filename)"
      ],
      "metadata": {
        "id": "SSAnGmAXZGsT",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "e443c6a4-cf9b-4448-8e6d-eb816d4435ee"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Saving model checkpoint to model_decoder\n",
            "Configuration saved in model_decoder/config.json\n",
            "Model weights saved in model_decoder/pytorch_model.bin\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ESiFpWJYzcgC"
      },
      "source": [
        "# Model typu encoder-decoder"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "WYz-wVPoz_tJ",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "e25586d5-4b1c-4588-efde-1da9f0a18cfd"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: sentencepiece==0.1.91 in /usr/local/lib/python3.7/dist-packages (0.1.91)\n",
            "Requirement already satisfied: transformers in /usr/local/lib/python3.7/dist-packages (4.16.2)\n",
            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (6.0)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.4.2)\n",
            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.62.3)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n",
            "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.11.0)\n",
            "Requirement already satisfied: tokenizers!=0.11.3,>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.11.5)\n",
            "Requirement already satisfied: huggingface-hub<1.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.4.0)\n",
            "Requirement already satisfied: sacremoses in /usr/local/lib/python3.7/dist-packages (from transformers) (0.0.47)\n",
            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n",
            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.21.5)\n",
            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers) (3.10.0.2)\n",
            "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.7)\n",
            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.7.0)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n",
            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.10.8)\n",
            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n",
            "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.1.0)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n",
            "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n"
          ]
        }
      ],
      "source": [
        "!pip install sentencepiece==0.1.91\n",
        "!pip install transformers"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ],
      "metadata": {
        "id": "phhvsUnGYC-o"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Ylv54tLmBX6a"
      },
      "outputs": [],
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score\n",
        "import torch\n",
        "from transformers import TrainingArguments, Trainer\n",
        "from transformers import EarlyStoppingCallback\n",
        "import matplotlib.pyplot as plt"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "_-KcWxdqzgF2",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "44fe6f02-c75b-47d1-f856-866f2fd8c51f"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "loading file https://huggingface.co/t5-small/resolve/main/spiece.model from cache at /root/.cache/huggingface/transformers/65fc04e21f45f61430aea0c4fedffac16a4d20d78b8e6601d8d996ebefefecd2.3b69006860e7b5d0a63ffdddc01ddcd6b7c318a6f4fd793596552c741734c62d\n",
            "loading file https://huggingface.co/t5-small/resolve/main/added_tokens.json from cache at None\n",
            "loading file https://huggingface.co/t5-small/resolve/main/special_tokens_map.json from cache at None\n",
            "loading file https://huggingface.co/t5-small/resolve/main/tokenizer_config.json from cache at None\n",
            "loading file https://huggingface.co/t5-small/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/06779097c78e12f47ef67ecb728810c2ae757ee0a9efe9390c6419783d99382d.8627f1bd5d270a9fd2e5a51c8bec3223896587cc3cfe13edeabb0992ab43c529\n",
            "loading configuration file https://huggingface.co/t5-small/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fe501e8fd6425b8ec93df37767fcce78ce626e34cc5edc859c662350cf712e41.406701565c0afd9899544c1cb8b93185a76f00b31e5ce7f6e18bbaef02241985\n",
            "Model config T5Config {\n",
            "  \"_name_or_path\": \"t5-small\",\n",
            "  \"architectures\": [\n",
            "    \"T5WithLMHeadModel\"\n",
            "  ],\n",
            "  \"d_ff\": 2048,\n",
            "  \"d_kv\": 64,\n",
            "  \"d_model\": 512,\n",
            "  \"decoder_start_token_id\": 0,\n",
            "  \"dropout_rate\": 0.1,\n",
            "  \"eos_token_id\": 1,\n",
            "  \"feed_forward_proj\": \"relu\",\n",
            "  \"initializer_factor\": 1.0,\n",
            "  \"is_encoder_decoder\": true,\n",
            "  \"layer_norm_epsilon\": 1e-06,\n",
            "  \"model_type\": \"t5\",\n",
            "  \"n_positions\": 512,\n",
            "  \"num_decoder_layers\": 6,\n",
            "  \"num_heads\": 8,\n",
            "  \"num_layers\": 6,\n",
            "  \"output_past\": true,\n",
            "  \"pad_token_id\": 0,\n",
            "  \"relative_attention_num_buckets\": 32,\n",
            "  \"task_specific_params\": {\n",
            "    \"summarization\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"length_penalty\": 2.0,\n",
            "      \"max_length\": 200,\n",
            "      \"min_length\": 30,\n",
            "      \"no_repeat_ngram_size\": 3,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"summarize: \"\n",
            "    },\n",
            "    \"translation_en_to_de\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"max_length\": 300,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"translate English to German: \"\n",
            "    },\n",
            "    \"translation_en_to_fr\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"max_length\": 300,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"translate English to French: \"\n",
            "    },\n",
            "    \"translation_en_to_ro\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"max_length\": 300,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"translate English to Romanian: \"\n",
            "    }\n",
            "  },\n",
            "  \"transformers_version\": \"4.16.2\",\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 32128\n",
            "}\n",
            "\n",
            "loading configuration file https://huggingface.co/t5-small/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fe501e8fd6425b8ec93df37767fcce78ce626e34cc5edc859c662350cf712e41.406701565c0afd9899544c1cb8b93185a76f00b31e5ce7f6e18bbaef02241985\n",
            "Model config T5Config {\n",
            "  \"architectures\": [\n",
            "    \"T5WithLMHeadModel\"\n",
            "  ],\n",
            "  \"d_ff\": 2048,\n",
            "  \"d_kv\": 64,\n",
            "  \"d_model\": 512,\n",
            "  \"decoder_start_token_id\": 0,\n",
            "  \"dropout_rate\": 0.1,\n",
            "  \"eos_token_id\": 1,\n",
            "  \"feed_forward_proj\": \"relu\",\n",
            "  \"initializer_factor\": 1.0,\n",
            "  \"is_encoder_decoder\": true,\n",
            "  \"layer_norm_epsilon\": 1e-06,\n",
            "  \"model_type\": \"t5\",\n",
            "  \"n_positions\": 512,\n",
            "  \"num_decoder_layers\": 6,\n",
            "  \"num_heads\": 8,\n",
            "  \"num_layers\": 6,\n",
            "  \"output_past\": true,\n",
            "  \"pad_token_id\": 0,\n",
            "  \"relative_attention_num_buckets\": 32,\n",
            "  \"task_specific_params\": {\n",
            "    \"summarization\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"length_penalty\": 2.0,\n",
            "      \"max_length\": 200,\n",
            "      \"min_length\": 30,\n",
            "      \"no_repeat_ngram_size\": 3,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"summarize: \"\n",
            "    },\n",
            "    \"translation_en_to_de\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"max_length\": 300,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"translate English to German: \"\n",
            "    },\n",
            "    \"translation_en_to_fr\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"max_length\": 300,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"translate English to French: \"\n",
            "    },\n",
            "    \"translation_en_to_ro\": {\n",
            "      \"early_stopping\": true,\n",
            "      \"max_length\": 300,\n",
            "      \"num_beams\": 4,\n",
            "      \"prefix\": \"translate English to Romanian: \"\n",
            "    }\n",
            "  },\n",
            "  \"transformers_version\": \"4.16.2\",\n",
            "  \"use_cache\": true,\n",
            "  \"vocab_size\": 32128\n",
            "}\n",
            "\n",
            "loading weights file https://huggingface.co/t5-small/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/fee5a3a0ae379232608b6eed45d2d7a0d2966b9683728838412caccc41b4b0ed.ddacdc89ec88482db20c676f0861a336f3d0409f94748c209847b49529d73885\n",
            "All model checkpoint weights were used when initializing T5ForConditionalGeneration.\n",
            "\n",
            "All the weights of T5ForConditionalGeneration were initialized from the model checkpoint at t5-small.\n",
            "If your task is similar to the task the model of the checkpoint was trained on, you can already use T5ForConditionalGeneration for predictions without further training.\n",
            "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py:2882: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version.\n",
            "\n",
            "\n",
            "  exec(code_obj, self.user_global_ns, self.user_ns)\n",
            "Skipping line 16844: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 19370: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 31753: field larger than field limit (131072)\n",
            "Skipping line 33676: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 65976: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 116130: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 127080: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 154052: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 174200: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 189740: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 274245: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 275624: field larger than field limit (131072)\n",
            "Skipping line 302668: field larger than field limit (131072)\n",
            "Skipping line 307322: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 317541: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 333957: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 345859: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 359845: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 359846: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 359847: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 359849: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 371329: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 384761: field larger than field limit (131072)\n",
            "Skipping line 389712: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 391820: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 398927: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 401260: field larger than field limit (131072)\n",
            "Skipping line 403079: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 454667: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 465419: field larger than field limit (131072)\n",
            "Skipping line 466152: field larger than field limit (131072)\n",
            "Skipping line 485309: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 529874: field larger than field limit (131072)\n",
            "Skipping line 552169: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 554628: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 560429: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 589855: field larger than field limit (131072)\n",
            "Skipping line 601507: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 614020: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 630106: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 632882: field larger than field limit (131072)\n",
            "Skipping line 637573: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
            "Skipping line 658667: field larger than field limit (131072)\n"
          ]
        }
      ],
      "source": [
        "from transformers import T5Tokenizer, T5ForConditionalGeneration\n",
        "from transformers import EvalPrediction\n",
        "\n",
        "model_name = \"t5-small\"\n",
        "tokenizer = T5Tokenizer.from_pretrained(model_name)\n",
        "model = T5ForConditionalGeneration.from_pretrained(model_name)\n",
        "\n",
        "\"\"\"\n",
        "1 - 22 -> 1 klasa\n",
        "23 - 31 -> 2 klasa\n",
        "32 - 39 -> 3 klasa \n",
        "40 - 48 -> 4 klasa\n",
        "\"\"\"\n",
        "\n",
        "def mapAgeToClass2(value: pd.DataFrame):\n",
        "    if(value['age'] <=22):\n",
        "        return 'class1'\n",
        "    elif(value['age'] > 22 and value['age'] <= 31):\n",
        "        return 'class2'\n",
        "    elif(value['age'] > 31 and value['age'] <= 39):\n",
        "        return 'class3'\n",
        "    else:\n",
        "        return 'class4'\n",
        "\n",
        "data_path = 'drive/MyDrive/blogtext.csv'\n",
        "\n",
        "data = pd.read_csv(data_path, error_bad_lines=False, engine='python')\n",
        "data = data[:data_amount]\n",
        "data['label'] = data.apply(lambda row: mapAgeToClass2(row), axis=1)\n",
        "\n",
        "\n",
        "X = list(data['text'])\n",
        "Y = list(data['label'])\n",
        "if (torch.cuda.is_available()):\n",
        "    device = \"cuda:0\"\n",
        "    torch.cuda.empty_cache()\n",
        "\n",
        "\n",
        "X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2)\n",
        "\n",
        "X_train_tokenized = tokenizer(X_train, padding=True, truncation=True, max_length=1024)\n",
        "X_val_tokenized = tokenizer(X_val, padding=True, truncation=True, max_length=1024)\n",
        "\n",
        "class Dataset(torch.utils.data.Dataset):\n",
        "    def __init__(self, encodings, labels=None):\n",
        "        self.encodings = encodings\n",
        "        self.labels = labels\n",
        "\n",
        "    def __getitem__(self, idx):\n",
        "        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}\n",
        "        if self.labels:\n",
        "            item[\"labels\"] = torch.tensor(tokenizer(self.labels[idx])['input_ids'])\n",
        "        return item\n",
        "\n",
        "    def __len__(self):\n",
        "        return len(self.encodings[\"input_ids\"])\n",
        "\n",
        "train_dataset = Dataset(X_train_tokenized, y_train)\n",
        "val_dataset = Dataset(X_val_tokenized, y_val)\n",
        "\n",
        "def compute_metrics(pred):\n",
        "  labels_ids = pred.label_ids\n",
        "  pred_ids = pred.predictions\n",
        "\n",
        "  pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)\n",
        "  label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)\n",
        "\n",
        "  accuracy = sum([int(pred_str[i] == label_str[i]) for i in range(len(pred_str))]) / len(pred_str)\n",
        "\n",
        "  return {\"accuracy\": accuracy}\n",
        "\n"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer\n",
        "\n",
        "args = Seq2SeqTrainingArguments(\n",
        "    output_dir=\"output\",\n",
        "    evaluation_strategy=\"steps\",\n",
        "    eval_steps=50,\n",
        "    per_device_train_batch_size=8,\n",
        "    per_device_eval_batch_size=8,\n",
        "    num_train_epochs=3,\n",
        "    seed=0,\n",
        "    load_best_model_at_end=True,\n",
        "    predict_with_generate=True\n",
        ")\n",
        "\n",
        "trainer = Seq2SeqTrainer(\n",
        "    model=model,\n",
        "    args=args,\n",
        "    train_dataset=train_dataset,\n",
        "    eval_dataset=val_dataset,\n",
        "    compute_metrics=compute_metrics\n",
        ")"
      ],
      "metadata": {
        "id": "XayaHmAMgI1x",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "4c32a772-96bc-4a43-b406-110c5f311932"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "PyTorch: setting up devices\n",
            "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "8nVY24TCz1Mi",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000
        },
        "outputId": "b4542048-d208-463a-b088-df9645f8b92d"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
            "  FutureWarning,\n",
            "***** Running training *****\n",
            "  Num examples = 4000\n",
            "  Num Epochs = 3\n",
            "  Instantaneous batch size per device = 8\n",
            "  Total train batch size (w. parallel, distributed & accumulation) = 8\n",
            "  Gradient Accumulation steps = 1\n",
            "  Total optimization steps = 1500\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/html": [
              "\n",
              "    <div>\n",
              "      \n",
              "      <progress value='1500' max='1500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
              "      [1500/1500 32:13, Epoch 3/3]\n",
              "    </div>\n",
              "    <table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              " <tr style=\"text-align: left;\">\n",
              "      <th>Step</th>\n",
              "      <th>Training Loss</th>\n",
              "      <th>Validation Loss</th>\n",
              "      <th>Accuracy</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <td>50</td>\n",
              "      <td>No log</td>\n",
              "      <td>2.898511</td>\n",
              "      <td>0.000000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>100</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.437433</td>\n",
              "      <td>0.601000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>150</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.301548</td>\n",
              "      <td>0.645000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>200</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.278892</td>\n",
              "      <td>0.668000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>250</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.270245</td>\n",
              "      <td>0.686000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>300</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.286085</td>\n",
              "      <td>0.663000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>350</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.262066</td>\n",
              "      <td>0.708000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>400</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.257251</td>\n",
              "      <td>0.697000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>450</td>\n",
              "      <td>No log</td>\n",
              "      <td>0.252848</td>\n",
              "      <td>0.710000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>500</td>\n",
              "      <td>1.057600</td>\n",
              "      <td>0.248504</td>\n",
              "      <td>0.701000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>550</td>\n",
              "      <td>1.057600</td>\n",
              "      <td>0.251563</td>\n",
              "      <td>0.721000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>600</td>\n",
              "      <td>1.057600</td>\n",
              "      <td>0.239508</td>\n",
              "      <td>0.731000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>650</td>\n",
              "      <td>1.057600</td>\n",
              "      <td>0.235462</td>\n",
              "      <td>0.738000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>700</td>\n",
              "      <td>1.057600</td>\n",
              "      <td>0.246152</td>\n",
              "      <td>0.734000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>750</td>\n",
              "      <td>1.057600</td>\n",
              "      <td>0.237433</td>\n",
              "      <td>0.733000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>800</td>\n",
              "      <td>1.057600</td>\n",
              "      <td>0.234127</td>\n",
              "      <td>0.752000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>850</td>\n",
              "      <td>1.057600</td>\n",
              "      <td>0.224785</td>\n",
              "      <td>0.760000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>900</td>\n",
              "      <td>1.057600</td>\n",
              "      <td>0.222618</td>\n",
              "      <td>0.747000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>950</td>\n",
              "      <td>1.057600</td>\n",
              "      <td>0.217110</td>\n",
              "      <td>0.770000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1000</td>\n",
              "      <td>0.266600</td>\n",
              "      <td>0.214305</td>\n",
              "      <td>0.765000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1050</td>\n",
              "      <td>0.266600</td>\n",
              "      <td>0.213813</td>\n",
              "      <td>0.771000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1100</td>\n",
              "      <td>0.266600</td>\n",
              "      <td>0.212208</td>\n",
              "      <td>0.774000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1150</td>\n",
              "      <td>0.266600</td>\n",
              "      <td>0.211007</td>\n",
              "      <td>0.772000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1200</td>\n",
              "      <td>0.266600</td>\n",
              "      <td>0.210451</td>\n",
              "      <td>0.768000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1250</td>\n",
              "      <td>0.266600</td>\n",
              "      <td>0.210460</td>\n",
              "      <td>0.768000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1300</td>\n",
              "      <td>0.266600</td>\n",
              "      <td>0.214561</td>\n",
              "      <td>0.769000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1350</td>\n",
              "      <td>0.266600</td>\n",
              "      <td>0.210450</td>\n",
              "      <td>0.767000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1400</td>\n",
              "      <td>0.266600</td>\n",
              "      <td>0.209276</td>\n",
              "      <td>0.767000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1450</td>\n",
              "      <td>0.266600</td>\n",
              "      <td>0.210069</td>\n",
              "      <td>0.769000</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <td>1500</td>\n",
              "      <td>0.244700</td>\n",
              "      <td>0.210056</td>\n",
              "      <td>0.766000</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table><p>"
            ],
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "Saving model checkpoint to output/checkpoint-500\n",
            "Configuration saved in output/checkpoint-500/config.json\n",
            "Model weights saved in output/checkpoint-500/pytorch_model.bin\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "Saving model checkpoint to output/checkpoint-1000\n",
            "Configuration saved in output/checkpoint-1000/config.json\n",
            "Model weights saved in output/checkpoint-1000/pytorch_model.bin\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "***** Running Evaluation *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n",
            "Saving model checkpoint to output/checkpoint-1500\n",
            "Configuration saved in output/checkpoint-1500/config.json\n",
            "Model weights saved in output/checkpoint-1500/pytorch_model.bin\n",
            "\n",
            "\n",
            "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
            "\n",
            "\n",
            "Loading best model from output/checkpoint-1500 (score: 0.2100560963153839).\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "TrainOutput(global_step=1500, training_loss=0.5229549509684245, metrics={'train_runtime': 1934.1295, 'train_samples_per_second': 6.204, 'train_steps_per_second': 0.776, 'total_flos': 3248203235328000.0, 'train_loss': 0.5229549509684245, 'epoch': 3.0})"
            ]
          },
          "metadata": {},
          "execution_count": 20
        }
      ],
      "source": [
        "trainer.train()"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "result = trainer.predict(val_dataset)"
      ],
      "metadata": {
        "id": "yBrHzXzhaKvk",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 89
        },
        "outputId": "92a6c3ea-4695-4dcf-fe71-9621fadc9906"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "***** Running Prediction *****\n",
            "  Num examples = 1000\n",
            "  Batch size = 8\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/html": [
              "\n",
              "    <div>\n",
              "      \n",
              "      <progress value='125' max='125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
              "      [125/125 00:33]\n",
              "    </div>\n",
              "    "
            ],
            "text/plain": [
              "<IPython.core.display.HTML object>"
            ]
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print(result.metrics)"
      ],
      "metadata": {
        "id": "nzm2vx86llKw",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "6be67f3e-043c-423c-c81a-8686d59a656e"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'test_loss': 0.2100560963153839, 'test_accuracy': 0.766, 'test_runtime': 45.1374, 'test_samples_per_second': 22.155, 'test_steps_per_second': 2.769}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "filename='model_encoder_decoder'\n",
        "trainer.save_model(filename)"
      ],
      "metadata": {
        "id": "LWpjAH_YaL66",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "e5c07d60-c441-4dc1-8ce0-66e815823a68"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "Saving model checkpoint to model_encoder_decoder\n",
            "Configuration saved in model_encoder_decoder/config.json\n",
            "Model weights saved in model_encoder_decoder/pytorch_model.bin\n"
          ]
        }
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "collapsed_sections": [],
      "name": "main.ipynb",
      "provenance": [],
      "machine_shape": "hm"
    },
    "interpreter": {
      "hash": "f4394274b6de412f99b9d08dfb473204abc12afd5637ebb20c9ad8dbd67e97a0"
    },
    "kernelspec": {
      "display_name": "Python 3.10.1 64-bit ('venv': venv)",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.12"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}