projekt_glebokie/main.ipynb
wangobango 5c4e2db8ab done
2022-02-19 17:34:58 +01:00

8041 lines
602 KiB
Plaintext
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"source": [
"data_amount = 5000"
],
"metadata": {
"id": "oTWhQK1Aw-J7"
},
"execution_count": 1,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "aCSHYmqxnJmd",
"outputId": "5e66d5f2-7e6e-4495-ca88-ccbd0c64ac33"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting transformers\n",
" Downloading transformers-4.16.2-py3-none-any.whl (3.5 MB)\n",
"\u001b[K |████████████████████████████████| 3.5 MB 4.3 MB/s \n",
"\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.62.3)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.21.5)\n",
"Collecting tokenizers!=0.11.3,>=0.10.1\n",
" Downloading tokenizers-0.11.5-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.8 MB)\n",
"\u001b[K |████████████████████████████████| 6.8 MB 64.5 MB/s \n",
"\u001b[?25hCollecting sacremoses\n",
" Downloading sacremoses-0.0.47-py2.py3-none-any.whl (895 kB)\n",
"\u001b[K |████████████████████████████████| 895 kB 58.2 MB/s \n",
"\u001b[?25hCollecting huggingface-hub<1.0,>=0.1.0\n",
" Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)\n",
"\u001b[K |████████████████████████████████| 67 kB 7.8 MB/s \n",
"\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n",
"Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.11.0)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n",
"Collecting pyyaml>=5.1\n",
" Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n",
"\u001b[K |████████████████████████████████| 596 kB 60.4 MB/s \n",
"\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.4.2)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers) (3.10.0.2)\n",
"Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.7)\n",
"Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.7.0)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.10.8)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n",
"Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n",
"Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.1.0)\n",
"Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n",
"Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers\n",
" Attempting uninstall: pyyaml\n",
" Found existing installation: PyYAML 3.13\n",
" Uninstalling PyYAML-3.13:\n",
" Successfully uninstalled PyYAML-3.13\n",
"Successfully installed huggingface-hub-0.4.0 pyyaml-6.0 sacremoses-0.0.47 tokenizers-0.11.5 transformers-4.16.2\n"
]
}
],
"source": [
"!pip3 install transformers"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Jk99LQjzmzvw"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score\n",
"import torch\n",
"from transformers import TrainingArguments, Trainer\n",
"from transformers import BertTokenizer, BertForSequenceClassification\n",
"from transformers import EarlyStoppingCallback\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "wb-zBvXlaTAO",
"outputId": "62a128ce-8b64-404a-a462-b9c92e350246"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"id": "jFTxtzzKmzv1",
"outputId": "afc4b505-8f43-484b-dbad-20f9e97fe37d"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py:2882: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version.\n",
"\n",
"\n",
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
"Skipping line 16844: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 19370: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 31753: field larger than field limit (131072)\n",
"Skipping line 33676: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 65976: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 116130: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 127080: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 154052: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 174200: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 189740: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 274245: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 275624: field larger than field limit (131072)\n",
"Skipping line 302668: field larger than field limit (131072)\n",
"Skipping line 307322: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 317541: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 333957: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 345859: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 359845: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 359846: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 359847: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 359849: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 371329: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 384761: field larger than field limit (131072)\n",
"Skipping line 389712: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 391820: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 398927: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 401260: field larger than field limit (131072)\n",
"Skipping line 403079: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 454667: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 465419: field larger than field limit (131072)\n",
"Skipping line 466152: field larger than field limit (131072)\n",
"Skipping line 485309: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 529874: field larger than field limit (131072)\n",
"Skipping line 552169: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 554628: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 560429: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 589855: field larger than field limit (131072)\n",
"Skipping line 601507: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 614020: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 630106: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 632882: field larger than field limit (131072)\n",
"Skipping line 637573: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 658667: field larger than field limit (131072)\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/html": [
"\n",
" <div id=\"df-95a8d503-58ea-4f94-ba7f-46ac042b6219\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>gender</th>\n",
" <th>age</th>\n",
" <th>topic</th>\n",
" <th>sign</th>\n",
" <th>date</th>\n",
" <th>text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2059027</td>\n",
" <td>male</td>\n",
" <td>15</td>\n",
" <td>Student</td>\n",
" <td>Leo</td>\n",
" <td>14,May,2004</td>\n",
" <td>Info has been found (+/- 100 pages,...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2059027</td>\n",
" <td>male</td>\n",
" <td>15</td>\n",
" <td>Student</td>\n",
" <td>Leo</td>\n",
" <td>13,May,2004</td>\n",
" <td>These are the team members: Drewe...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2059027</td>\n",
" <td>male</td>\n",
" <td>15</td>\n",
" <td>Student</td>\n",
" <td>Leo</td>\n",
" <td>12,May,2004</td>\n",
" <td>In het kader van kernfusie op aarde...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2059027</td>\n",
" <td>male</td>\n",
" <td>15</td>\n",
" <td>Student</td>\n",
" <td>Leo</td>\n",
" <td>12,May,2004</td>\n",
" <td>testing!!! testing!!!</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>3581210</td>\n",
" <td>male</td>\n",
" <td>33</td>\n",
" <td>InvestmentBanking</td>\n",
" <td>Aquarius</td>\n",
" <td>11,June,2004</td>\n",
" <td>Thanks to Yahoo!'s Toolbar I can ...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-95a8d503-58ea-4f94-ba7f-46ac042b6219')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-95a8d503-58ea-4f94-ba7f-46ac042b6219 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-95a8d503-58ea-4f94-ba7f-46ac042b6219');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
],
"text/plain": [
" id ... text\n",
"0 2059027 ... Info has been found (+/- 100 pages,...\n",
"1 2059027 ... These are the team members: Drewe...\n",
"2 2059027 ... In het kader van kernfusie op aarde...\n",
"3 2059027 ... testing!!! testing!!! \n",
"4 3581210 ... Thanks to Yahoo!'s Toolbar I can ...\n",
"\n",
"[5 rows x 7 columns]"
]
},
"metadata": {},
"execution_count": 100
}
],
"source": [
"data_path = 'drive/MyDrive/blogtext.csv'\n",
"data = pd.read_csv(data_path, error_bad_lines=False, engine='python')\n",
"data = data[:data_amount]\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "84JKE7annp0B"
},
"outputs": [],
"source": [
"if (torch.cuda.is_available()):\n",
" device = \"cuda:0\"\n",
" torch.cuda.empty_cache()\n",
"else:\n",
" device = \"cpu\""
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "KbKkY5Dbmzv3"
},
"source": [
"# Model typu encoder (BertForSequenceClassification)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "s4sgNo1rmzv5",
"outputId": "fbce79df-ecf4-4b7f-bc91-b430b7747ccc"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"loading file https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/45c3f7a79a80e1cf0a489e5c62b43f173c15db47864303a55d623bb3c96f72a5.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99\n",
"loading file https://huggingface.co/bert-base-uncased/resolve/main/added_tokens.json from cache at None\n",
"loading file https://huggingface.co/bert-base-uncased/resolve/main/special_tokens_map.json from cache at None\n",
"loading file https://huggingface.co/bert-base-uncased/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/c1d7f0a763fb63861cc08553866f1fc3e5a6f4f07621be277452d26d71303b7e.20430bd8e10ef77a7d2977accefe796051e01bc2fc4aa146bc862997a1a15e79\n",
"loading file https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/534479488c54aeaf9c3406f647aa2ec13648c06771ffe269edabebd4c412da1d.7f2721073f19841be16f41b0a70b600ca6b880c8f3df6f3535cbc704371bdfa4\n",
"loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e\n",
"Model config BertConfig {\n",
" \"_name_or_path\": \"bert-base-uncased\",\n",
" \"architectures\": [\n",
" \"BertForMaskedLM\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"classifier_dropout\": null,\n",
" \"gradient_checkpointing\": false,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"layer_norm_eps\": 1e-12,\n",
" \"max_position_embeddings\": 512,\n",
" \"model_type\": \"bert\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"pad_token_id\": 0,\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"transformers_version\": \"4.16.2\",\n",
" \"type_vocab_size\": 2,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 30522\n",
"}\n",
"\n",
"loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e\n",
"Model config BertConfig {\n",
" \"architectures\": [\n",
" \"BertForMaskedLM\"\n",
" ],\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"classifier_dropout\": null,\n",
" \"gradient_checkpointing\": false,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"id2label\": {\n",
" \"0\": \"LABEL_0\",\n",
" \"1\": \"LABEL_1\",\n",
" \"2\": \"LABEL_2\",\n",
" \"3\": \"LABEL_3\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"label2id\": {\n",
" \"LABEL_0\": 0,\n",
" \"LABEL_1\": 1,\n",
" \"LABEL_2\": 2,\n",
" \"LABEL_3\": 3\n",
" },\n",
" \"layer_norm_eps\": 1e-12,\n",
" \"max_position_embeddings\": 512,\n",
" \"model_type\": \"bert\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"pad_token_id\": 0,\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"problem_type\": \"multi_label_classification\",\n",
" \"transformers_version\": \"4.16.2\",\n",
" \"type_vocab_size\": 2,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 30522\n",
"}\n",
"\n",
"loading weights file https://huggingface.co/bert-base-uncased/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/a8041bf617d7f94ea26d15e218abd04afc2004805632abc0ed2066aa16d50d04.faf6ea826ae9c5867d12b22257f9877e6b8367890837bd60f7c54a29633f7f2f\n",
"Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']\n",
"- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
}
],
"source": [
"model_name = 'bert-base-uncased'\n",
"tokenizer = BertTokenizer.from_pretrained(model_name)\n",
"model = BertForSequenceClassification.from_pretrained(model_name, problem_type=\"multi_label_classification\", num_labels=4).to(device)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 299
},
"id": "mzfWUCNomzv6",
"outputId": "190e4b3d-cb6e-4ca9-90cc-aa0816834b19"
},
"outputs": [
{
"output_type": "display_data",
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEICAYAAABRSj9aAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAaIElEQVR4nO3df5DU933f8edLYJDla1CRlKsjsMAG6iKnIdUJ5MZ17qTYPiVxzm5RykUmeAaVOg6ZJK3i4ExCJMXpVJlU1B3LSS4VDUUqhwY1ztU+j+IadlynMgYiKQbZyAeSRsiyVThkeS0jjPXuH/thsl7t3S13++O+H16PmRt9f3x2v6/v1+fXfvnc7Z4iAjMzy9clnQ5gZmat5aI3M8uci97MLHMuejOzzLnozcwy56I3M8uci946TtIRSb2dztFJkt4v6VlJZUk/2ek8lhcXvbWUpKcl/UzNtg9K+uL59Yi4NiJKUzzPEkkhaW6LonbaHwObI6IrIh6tN0AVxyU90eZsVnAuejNgFryAXAMcmWLMO4EfBd4s6frWR7JcuOit46rv+iWtlnRQ0kuSviXpnjTsC+m/L6bpjbdLukTS70p6RtILkv67pAVVz/vLad8pSb9Xc5w7JO2RdL+kl4APpmM/IulFSc9L+oSkeVXPF5I+LOnrkr4j6Q8kvUXS/015H6weX3OOdbNKmi+pDMwBHpd0bJJLtQH4K2A0LVc//1JJX0i5/rekeyXdX7X/hpTzRUmPX+xTZRcbF73NNh8HPh4RPwK8BXgwbX9n+u/laXrjEeCD6asPeDPQBXwCQNJK4JPArcAbgQXA1TXHGgD2AJcDDwA/AH4TuBJ4O3AT8OGax7wHuA64AfgIMAR8AFgMvA0YnOC86maNiFcioiuN+YmIeEu9B0u6DFibcj4ArKt5UfkfwJeBK4A7gPVVj70a+AzwMWAhcDvwkKSrJshqmXHRWzt8Kt1JvijpRSoFPJHvA8skXRkR5Yj40iRjbwXuiYjjEVEGPkqlAOdSKcX/FRFfjIizwFag9oOdHomIT0XEqxHxvYg4FBFfiohzEfE08GfAT9c85o8i4qWIOAIcBv46Hf/bwGeBiX6QOlnWRvxL4BXgr6mU9uuAnwOQ9CbgemBrRJyNiC8CI1WP/QAwGhGj6Vw/BxwEfrbBY1vBueitHd4XEZef/+K1d8nVNgIrgK9JOiDp5ycZ+2PAM1XrzwBzge6079nzOyLiZeBUzeOfrV6RtELSpyV9M03n/Acqd/fVvlW1/L06613UN1nWRmwAHkwvQmeAh/j76ZsfA8bTOZ5XfW7XALfUvNi+g8q/dOwi0OkfQJn9kIj4OjAo6RIqd7F7JF3Ba+/GAb5BpcTOexNwjkr5Pg/84/M7JL2eyrTGDx2uZv1PgEeBwYj4jqTfoPIvg2aYLOukJC0CbgRWS/pXafNlwKWSrqRyrgslXVZV9ournuJZYGdE/JsZnoMVlO/obVaR9AFJV0XEq8CLafOrwP9L/31z1fBdwG+mH0R2UbkD3x0R56jMvb9X0j9Pc9l3AJri8P8AeAkoS3or8CvNOq8psk5lPfAklReuVelrBXCCyovSM1SmYu6QNE/S24H3Vj3+firX4j2S5ki6VFJvegGxi4CL3mabfuBI+k2UjwPr0vz5y8AfAn+Tph9uALYDO6n8Rs5TwBng1wDSHPqvAcNU7njLwAtU5rkncjvwS8B3gD8HdjfxvCbM2oANwCcj4pvVX8Cf8vfTN7dS+QHyKSo/dN1NOteIeJbKD55/h8oL5rPAb+H//1805D88YheDdBf9IrA8Ip7qdJ5Wk7Qb+FpE/H6ns1jn+RXdsiXpvZIuk/QGKu88/QrwdGdTtYak69Pv9F8iqZ/KHfynOp3LZgcXveVsgMoPQb8BLKcyDZTrP2H/EVCiMkX1X4BfmeijFOzi46kbM7PM+Y7ezCxzs+736K+88spYsmRJS4/x3e9+lze84Q0tPUYrFTl/kbNDsfMXOTsUO387sh86dOhkRNT9WItZV/RLlizh4MGDLT1GqVSit7e3pcdopSLnL3J2KHb+ImeHYudvR3ZJz0y0z1M3ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZm3XvjDWbzZ58Eu68s9MppmdwsP3Z9+1r7/GsPt/Rm5llzkVvZpY5F72ZWeZc9GZmmXPRm5llzkVvZpY5F72ZWeZc9GZmmXPRm5llzkVvZpY5F72ZWeYaKnpJ/ZKOShqTtKXO/vmSdqf9+yUtSdtvlfRY1derklY19xTMzGwyUxa9pDnAvcDNwEpgUNLKmmEbgdMRsQzYBtwNEBEPRMSqiFgFrAeeiojHmnkCZmY2uUbu6FcDYxFxPCLOAsPAQM2YAWBHWt4D3CRJNWMG02PNzKyNFBGTD5DWAv0RcVtaXw+siYjNVWMOpzEn0vqxNOZk1ZhjwEBEHK5zjE3AJoDu7u7rhodb+3pQLpfp6upq6TFaqcj5i5wd4OTJMuPjxcy/cGH7s69Y0bznKvL3Tjuy9/X1HYqInnr72vJ59JLWAC/XK3mAiBgChgB6enqit7e3pXlKpRKtPkYrFTl/kbMDDA2V2LWrt9MxpmVwsP3Zm/l59EX+3ul09kambp4DFletL0rb6o6RNBdYAJyq2r8O2DX9mGZmNl2NFP0BYLmkpZLmUSntkZoxI8CGtLwW2BtpTkjSJcAv4vl5M7OOmHLqJiLOSdoMPAzMAbZHxBFJdwEHI2IEuA/YKWkMGKfyYnDeO4FnI+J48+ObmdlUGpqjj4hRYLRm29aq5TPALRM8tgTcMP2IZmY2E35nrJlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZa6joJfVLOippTNKWOvvnS9qd9u+XtKRq3z+V9IikI5K+IunS5sU3M7OpTFn0kuYA9wI3AyuBQUkra4ZtBE5HxDJgG3B3euxc4H7gQxFxLdALfL9p6c3MbEqN3NGvBsYi4nhEnAWGgYGaMQPAjrS8B7hJkoB3A38XEY8DRMSpiPhBc6KbmVkjGin6q4Fnq9ZPpG11x0TEOeDbwBXACiAkPSzpbyV9ZOaRzczsQsxtw/O/A7geeBn4vKRDEfH56kGSNgGbALq7uymVSi0NVS6XW36MVipy/iJnB1i4sMzgYKnTMaalE9mb+T91kb93Op29kaJ/Dlhctb4obas35kSal18AnKJy9/+FiDgJIGkU+GfADxV9RAwBQwA9PT3R29t7wSdyIUqlEq0+RisVOX+RswMMDZXYtau30zGmZXCw/dn37WvecxX5e6fT2RuZujkALJe0VNI8YB0wUjNmBNiQltcCeyMigIeBH5d0WXoB+GngieZENzOzRkx5Rx8R5yRtplLac4DtEXFE0l3AwYgYAe4DdkoaA8apvBgQEacl3UPlxSKA0Yj4TIvOxczM6mhojj4iRoHRmm1bq5bPALdM8Nj7qfyKpZmZdYDfGWtmljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZa6joJfVLOippTNKWOvvnS9qd9u+XtCRtXyLpe5IeS19/2tz4ZmY2lSn/OLikOcC9wLuAE8ABSSMR8UTVsI3A6YhYJmkdcDfwr9O+YxGxqsm5zcysQY3c0a8GxiLieEScBYaBgZoxA8COtLwHuEmSmhfTzMymSxEx+QBpLdAfEbel9fXAmojYXDXmcBpzIq0fA9YAXcAR4EngJeB3I+L/1DnGJmATQHd393XDw8NNOLWJlctlurq6WnqMVipy/iJnBzh5ssz4eDHzL1zY/uwrVjTvuYr8vdOO7H19fYcioqfevimnbmboeeBNEXFK0nXApyRdGxEvVQ+KiCFgCKCnpyd6e3tbGqpUKtHqY7RSkfMXOTvA0FCJXbt6Ox1jWgYH2599377mPVeRv3c6nb2RqZvngMVV64vStrpjJM0FFgCnIuKViDgFEBGHgGNAE1/jzcxsKo0U/QFguaSlkuYB64CRmjEjwIa0vBbYGxEh6ar0w1wkvRlYDhxvTnQzM2vElFM3EXFO0mbgYWAOsD0ijki6CzgYESPAfcBOSWPAOJUXA4B3AndJ+j7wKvChiBhvxYmYmVl9Dc3RR8QoMFqzbWvV8hngljqPewh4aIYZzcxsBvzOWDOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy11DRS+qXdFTSmKQtdfbPl7Q77d8vaUnN/jdJKku6vTmxzcysUVMWvaQ5wL3AzcBKYFDSypphG4HTEbEM2AbcXbP/HuCzM49rZmYXqpE7+tXAWEQcj4izwDAwUDNmANiRlvcAN0kSgKT3AU8BR5oT2czMLoQiYvIB0lqgPyJuS+vrgTURsblqzOE05kRaPwasAc4AnwPeBdwOlCPij+scYxOwCaC7u/u64eHhJpzaxMrlMl1dXS09RisVOX+RswOcPFlmfLyY+RcubH/2FSua91xF/t5pR/a+vr5DEdFTb9/clh4Z7gC2RUQ53eDXFRFDwBBAT09P9Pb2tjRUqVSi1cdopSLnL3J2gKGhErt29XY6xrQMDrY/+759zXuuIn/vdDp7I0X/HLC4an1R2lZvzAlJc4EFwCkqd/VrJf0RcDnwqqQzEfGJGSc3M7OGNFL0B4DlkpZSKfR1wC/VjBkBNgCPAGuBvVGZE/oX5wdIuoPK1I1L3sysjaYs+og4J2kz8DAwB9geEUck3QUcjIgR4D5gp6QxYJzKi4GZmc0CDc3RR8QoMFqzbWvV8hnglime445p5DMzsxnyO2PNzDLnojczy5yL3swscy56M7PMuejNzDLnojczy5yL3swscy56M7PMuejNzDLnojczy5yL3swscy56M7PMuejNzDLnojczy5yL3swscy56M7PMuejNzDLnojczy5yL3swscw0VvaR+SUcljUnaUmf/fEm70/79kpak7aslPZa+Hpf0/ubGNzOzqUxZ9JLmAPcCNwMrgUFJK2uGbQROR8QyYBtwd9p+GOiJiFVAP/Bnkhr6g+RmZtYcjdzRrwbGIuJ4RJwFhoGBmjEDwI60vAe4SZIi4uWIOJe2XwpEM0KbmVnjFDF590paC/RHxG1pfT2wJiI2V405nMacSOvH0piTktYA24FrgPUR8Zd1jrEJ2ATQ3d193fDwcFNObiLlcpmurq6WHqOVipy/yNkBTp4sMz5ezPwLF7Y/+4oVzXuuIn/vtCN7X1/foYjoqbev5dMoEbEfuFbSPwF2SPpsRJypGTMEDAH09PREb29vSzOVSiVafYxWKnL+ImcHGBoqsWtXb6djTMvgYPuz79vXvOcq8vdOp7M3MnXzHLC4an1R2lZ3TJqDXwCcqh4QEV8FysDbphvWzMwuXCNFfwBYLmmppHnAOmCkZswIsCEtrwX2RkSkx8wFkHQN8Fbg6aYkNzOzhkw5dRMR5yRtBh4G5gDbI+KIpLuAgxExAtwH7JQ0BoxTeTEAeAewRdL3gVeBD0fEyVaciJmZ1dfQHH1EjAKjNdu2Vi2fAW6p87idwM4ZZjQzsxnwO2PNzDLnojczy5yL3swscy56M7PMuejNzDLnojczy5yL3swscy56M7PMuejNzDLnojczy5yL3swsc/6zfhe5vr72Hm9wEO68s73HbKbBwU4nMLtwvqM3M8uci97MLHPZTd00MhWRw/RBkfObWXv5jt7MLHMuejOzzLnozcwy11DRS+qXdFTSmKQtdfbPl7Q77d8vaUna/i5JhyR9Jf33xubGNzOzqUxZ9JLmAPcCNwMrgUFJK2uGbQROR8QyYBtwd9p+EnhvRPw4sAH//Vgzs7Zr5I5+NTAWEccj4iwwDAzUjBkAdqTlPcBNkhQRj0bEN9L2I8DrJc1vRnAzM2uMImLyAdJaoD8ibkvr64E1EbG5aszhNOZEWj+WxpyseZ4PRcTP1DnGJmATQHd393XDw8PTPqEnn5x6zMKFZcbHu6Z9jE4rcv4iZ4di5+9E9hUrmvdc5XKZrq5iXvt2ZO/r6zsUET319rXl9+glXUtlOufd9fZHxBAwBNDT0xO9vb3TPlYjv18+OFhi167pH6PTipy/yNmh2Pk7kX3fvuY9V6lUYibd0Emdzt7I1M1zwOKq9UVpW90xkuYCC4BTaX0R8JfAL0fEsZkGNjOzC9NI0R8AlktaKmkesA4YqRkzQuWHrQBrgb0REZIuBz4DbImIv2lWaDMza9yURR8R54DNwMPAV4EHI+KIpLsk/UIadh9whaQx4N8B538FczOwDNgq6bH09aNNPwszM5tQQ3P0ETEKjNZs21q1fAa4pc7jPgZ8bIYZzcxsBvzOWDOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy11DRS+qXdFTSmKQtdfbPl7Q77d8vaUnafoWkfZLKkj7R3OhmZtaIKYte0hzgXuBmYCUwKGllzbCNwOmIWAZsA+5O288Avwfc3rTEZmZ2QRq5o18NjEXE8Yg4CwwDAzVjBoAdaXkPcJMkRcR3I+KLVArfzMw6QBEx+QBpLdAfEbel9fXAmojYXDXmcBpzIq0fS2NOpvUPAj3Vj6k5xiZgE0B3d/d1w8PD0z6hJ5+ceszChWXGx7umfYxOK3L+ImeHYufvRPYVK5r3XOVyma6uYl77dmTv6+s7FBE99fbNbemRGxQRQ8AQQE9PT/T29k77ue68c+oxg4Mldu2a/jE6rcj5i5wdip2/E9n37Wvec5VKJWbSDZ3U6eyNTN08ByyuWl+UttUdI2kusAA41YyAZmY2M40U/QFguaSlkuYB64CRmjEjwIa0vBbYG1PNCZmZWVtMOXUTEeckbQYeBuYA2yPiiKS7gIMRMQLcB+yUNAaMU3kxAEDS08CPAPMkvQ94d0Q80fxTMTOzehqao4+IUWC0ZtvWquUzwC0TPHbJDPKZmdkM+Z2xZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZmxV/YcrM8tTX17znGhxs7C/IzUaNZm/mX+Sq5jt6M7PMuejNzDLnojczy5yL3swscy56M7PMNVT0kvolHZU0JmlLnf3zJe1O+/dLWlK176Np+1FJ72ledDMza8SURS9pDnAvcDOwEhiUtLJm2EbgdEQsA7YBd6fHrgTWAdcC/cAn0/OZmVmbNHJHvxoYi4jjEXEWGAYGasYMADvS8h7gJklK24cj4pWIeAoYS89nZmZt0sgbpq4Gnq1aPwGsmWhMRJyT9G3girT9SzWPvbr2AJI2AZvSalnS0YbST1OpxJXAyVYeo5WKnL/I2aHY+YucHYqdv9Hs0owOc81EO2bFO2MjYggYatfxJB2MiJ52Ha/Zipy/yNmh2PmLnB2Knb/T2RuZunkOWFy1vihtqztG0lxgAXCqwceamVkLNVL0B4DlkpZKmkflh6sjNWNGgA1peS2wNyIibV+XfitnKbAc+HJzopuZWSOmnLpJc+6bgYeBOcD2iDgi6S7gYESMAPcBOyWNAeNUXgxI4x4EngDOAb8aET9o0blciLZNE7VIkfMXOTsUO3+Rs0Ox83c0uyo33mZmliu/M9bMLHMuejOzzGVf9JK2S3pB0uGqbXdIek7SY+nrZzuZcSKSFkvaJ+kJSUck/XravlDS5yR9Pf33H3Y6az2T5J/111/SpZK+LOnxlP3OtH1p+piPsfSxH/M6nbWeSfL/haSnqq79qk5nnYikOZIelfTptF6Ia39enfwdu/bZFz3wF1Q+fqHWtohYlb5G25ypUeeAfx8RK4EbgF9NHyuxBfh8RCwHPp/WZ6OJ8sPsv/6vADdGxE8Aq4B+STdQ+XiPbenjPk5T+fiP2Wii/AC/VXXtH+tcxCn9OvDVqvWiXPvzavNDh6599kUfEV+g8ptAhRMRz0fE36bl71D5prmaH/7IiR3A+zqTcHKT5J/1oqKcVl+XvgK4kcrHfMDsvvYT5S8ESYuAnwP+a1oXBbn28Nr8nZZ90U9is6S/S1M7s3Lqo1r6RNCfBPYD3RHxfNr1TaC7Q7EaVpMfCnD90z+9HwNeAD4HHANejIhzaUjdj/SYLWrzR8T5a/+H6dpvkzS/gxEn85+BjwCvpvUrKNC157X5z+vItb9Yi/5PgLdQ+Sft88B/6mycyUnqAh4CfiMiXqrel96YNqvv1OrkL8T1j4gfRMQqKu/oXg28tcORLkhtfklvAz5K5TyuBxYCv93BiHVJ+nnghYg41Oks0zFJ/o5d+4uy6CPiW+n/BK8Cf84s/kRNSa+jUpIPRMT/TJu/JemNaf8bqdyxzUr18hfp+gNExIvAPuDtwOXpYz6gIB/pUZW/P02nRUS8Avw3Zue1/yngFyQ9TeXTcm8EPk5xrv1r8ku6v5PX/qIs+vMlmbwfODzR2E5K85L3AV+NiHuqdlV/5MQG4K/ana0RE+UvwvWXdJWky9Py64F3UfkZwz4qH/MBs/va18v/taobBFGZ45511z4iPhoRiyJiCZV32e+NiFspyLWfIP8HOnntZ8WnV7aSpF1AL3ClpBPA7wO96VebAnga+LcdCzi5nwLWA19Jc60AvwP8R+BBSRuBZ4Bf7FC+qUyUf7AA1/+NwA5V/lDOJcCDEfFpSU8Aw5I+BjxK5YVsNpoo/15JVwECHgM+1MmQF+i3Kca1n8gDnbr2/ggEM7PMXZRTN2ZmFxMXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZ+//gbHXWr/GHYAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
}
},
{
"output_type": "display_data",
"data": {
"text/plain": [
"<Figure size 10000x10000 with 0 Axes>"
]
},
"metadata": {}
}
],
"source": [
"n, bins, patches = plt.hist(data['age'], 4, density=True, facecolor='b', alpha=0.75)\n",
"\n",
"plt.title('Histogram of Age')\n",
"plt.grid(True)\n",
"plt.figure(figsize=(100,100), dpi=100)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 206
},
"id": "v6WbfLpimzv7",
"outputId": "309e9c1b-5730-4994-cd9a-59078ba1a2b2"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"\n",
" <div id=\"df-a13e500d-34c9-4227-aecc-9d385a845574\">\n",
" <div class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>gender</th>\n",
" <th>age</th>\n",
" <th>topic</th>\n",
" <th>sign</th>\n",
" <th>date</th>\n",
" <th>text</th>\n",
" <th>label</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2059027</td>\n",
" <td>male</td>\n",
" <td>15</td>\n",
" <td>Student</td>\n",
" <td>Leo</td>\n",
" <td>14,May,2004</td>\n",
" <td>Info has been found (+/- 100 pages,...</td>\n",
" <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2059027</td>\n",
" <td>male</td>\n",
" <td>15</td>\n",
" <td>Student</td>\n",
" <td>Leo</td>\n",
" <td>13,May,2004</td>\n",
" <td>These are the team members: Drewe...</td>\n",
" <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2059027</td>\n",
" <td>male</td>\n",
" <td>15</td>\n",
" <td>Student</td>\n",
" <td>Leo</td>\n",
" <td>12,May,2004</td>\n",
" <td>In het kader van kernfusie op aarde...</td>\n",
" <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2059027</td>\n",
" <td>male</td>\n",
" <td>15</td>\n",
" <td>Student</td>\n",
" <td>Leo</td>\n",
" <td>12,May,2004</td>\n",
" <td>testing!!! testing!!!</td>\n",
" <td>[1.0, 0.0, 0.0, 0.0]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>3581210</td>\n",
" <td>male</td>\n",
" <td>33</td>\n",
" <td>InvestmentBanking</td>\n",
" <td>Aquarius</td>\n",
" <td>11,June,2004</td>\n",
" <td>Thanks to Yahoo!'s Toolbar I can ...</td>\n",
" <td>[0.0, 0.0, 1.0, 0.0]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-a13e500d-34c9-4227-aecc-9d385a845574')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
" \n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
" </svg>\n",
" </button>\n",
" \n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" flex-wrap:wrap;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-a13e500d-34c9-4227-aecc-9d385a845574 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-a13e500d-34c9-4227-aecc-9d385a845574');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
" </div>\n",
" "
],
"text/plain": [
" id ... label\n",
"0 2059027 ... [1.0, 0.0, 0.0, 0.0]\n",
"1 2059027 ... [1.0, 0.0, 0.0, 0.0]\n",
"2 2059027 ... [1.0, 0.0, 0.0, 0.0]\n",
"3 2059027 ... [1.0, 0.0, 0.0, 0.0]\n",
"4 3581210 ... [0.0, 0.0, 1.0, 0.0]\n",
"\n",
"[5 rows x 8 columns]"
]
},
"metadata": {},
"execution_count": 104
}
],
"source": [
"\"\"\"\n",
"1 - 22 -> 1 klasa\n",
"23 - 31 -> 2 klasa\n",
"32 - 39 -> 3 klasa \n",
"40 - 48 -> 4 klasa\n",
"\"\"\"\n",
"def mapAgeToClass2(value: pd.DataFrame) -> int:\n",
" if(value['age'] <=22):\n",
" return [1.0,0.0,0.0,0.0]\n",
" elif(value['age'] > 22 and value['age'] <= 31):\n",
" return [0.0,1.0,0.0,0.0]\n",
" elif(value['age'] > 31 and value['age'] <= 39):\n",
" return [0.0,0.0,1.0,0.0]\n",
" else:\n",
" return [0.0,0.0,0.0,1.0]\n",
" \n",
"data['label'] = data.apply(lambda row: mapAgeToClass2(row), axis=1)\n",
"data.head()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "2CVdpLTWmzv8"
},
"outputs": [],
"source": [
"X = list(data['text'])\n",
"Y = list(data['label'])\n",
"\n",
"X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2)\n",
"X_train_tokenized = tokenizer(X_train, padding=True, truncation=True, max_length=512)\n",
"X_val_tokenized = tokenizer(X_val, padding=True, truncation=True, max_length=512)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "WAuTGUvKmzv9"
},
"outputs": [],
"source": [
"class Dataset(torch.utils.data.Dataset):\n",
" def __init__(self, encodings, labels=None):\n",
" self.encodings = encodings\n",
" self.labels = labels\n",
"\n",
" def __getitem__(self, idx):\n",
" item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}\n",
" if self.labels:\n",
" item[\"labels\"] = torch.tensor(self.labels[idx])\n",
" return item\n",
"\n",
" def __len__(self):\n",
" return len(self.encodings[\"input_ids\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "PKJ5TCMTmzv-"
},
"outputs": [],
"source": [
"train_dataset = Dataset(X_train_tokenized, y_train)\n",
"val_dataset = Dataset(X_val_tokenized, y_val)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "QDQ0m8Iomzv_"
},
"outputs": [],
"source": [
"def compute_metrics(p):\n",
" pred, labels = p\n",
" pred = np.argmax(pred, axis=1)\n",
" labels = np.argmax(labels, axis=1)\n",
"\n",
" accuracy = accuracy_score(y_true=labels, y_pred=pred)\n",
" recall = recall_score(y_true=labels, y_pred=pred, average='micro')\n",
" precision = precision_score(y_true=labels, y_pred=pred, average='micro')\n",
" f1 = f1_score(y_true=labels, y_pred=pred, average='micro')\n",
"\n",
" return {\"accuracy\": accuracy, \"precision\": precision, \"recall\": recall, \"f1\": f1}\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "5gxNl9bvmzwB",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "13e561ad-9da6-4bde-b823-11a7ca7e6184"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"PyTorch: setting up devices\n",
"The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
]
}
],
"source": [
"args = TrainingArguments(\n",
" output_dir=\"output\",\n",
" evaluation_strategy=\"steps\",\n",
" eval_steps=100,\n",
" per_device_train_batch_size=8,\n",
" per_device_eval_batch_size=8,\n",
" num_train_epochs=3,\n",
" seed=0,\n",
" load_best_model_at_end=True,\n",
")\n",
"trainer = Trainer(\n",
" model=model,\n",
" args=args,\n",
" train_dataset=train_dataset,\n",
" eval_dataset=val_dataset,\n",
" compute_metrics=compute_metrics,\n",
" callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"id": "dZ8FtrmnmzwB",
"outputId": "5cf81705-3255-4e0c-ee0e-fea255fe0c66"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" FutureWarning,\n",
"***** Running training *****\n",
" Num examples = 4000\n",
" Num Epochs = 3\n",
" Instantaneous batch size per device = 8\n",
" Total train batch size (w. parallel, distributed & accumulation) = 8\n",
" Gradient Accumulation steps = 1\n",
" Total optimization steps = 1500\n"
]
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='1100' max='1500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [1100/1500 12:04 < 04:23, 1.52 it/s, Epoch 2/3]\n",
" </div>\n",
" <table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>Step</th>\n",
" <th>Training Loss</th>\n",
" <th>Validation Loss</th>\n",
" <th>Accuracy</th>\n",
" <th>Precision</th>\n",
" <th>Recall</th>\n",
" <th>F1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>100</td>\n",
" <td>No log</td>\n",
" <td>0.308495</td>\n",
" <td>0.721000</td>\n",
" <td>0.721000</td>\n",
" <td>0.721000</td>\n",
" <td>0.721000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>200</td>\n",
" <td>No log</td>\n",
" <td>0.267907</td>\n",
" <td>0.793000</td>\n",
" <td>0.793000</td>\n",
" <td>0.793000</td>\n",
" <td>0.793000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>300</td>\n",
" <td>No log</td>\n",
" <td>0.246032</td>\n",
" <td>0.786000</td>\n",
" <td>0.786000</td>\n",
" <td>0.786000</td>\n",
" <td>0.786000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>400</td>\n",
" <td>No log</td>\n",
" <td>0.235976</td>\n",
" <td>0.796000</td>\n",
" <td>0.796000</td>\n",
" <td>0.796000</td>\n",
" <td>0.796000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>500</td>\n",
" <td>0.297000</td>\n",
" <td>0.217070</td>\n",
" <td>0.830000</td>\n",
" <td>0.830000</td>\n",
" <td>0.830000</td>\n",
" <td>0.830000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>600</td>\n",
" <td>0.297000</td>\n",
" <td>0.232244</td>\n",
" <td>0.828000</td>\n",
" <td>0.828000</td>\n",
" <td>0.828000</td>\n",
" <td>0.828000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>700</td>\n",
" <td>0.297000</td>\n",
" <td>0.198891</td>\n",
" <td>0.853000</td>\n",
" <td>0.853000</td>\n",
" <td>0.853000</td>\n",
" <td>0.853000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>800</td>\n",
" <td>0.297000</td>\n",
" <td>0.202887</td>\n",
" <td>0.851000</td>\n",
" <td>0.851000</td>\n",
" <td>0.851000</td>\n",
" <td>0.851000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>900</td>\n",
" <td>0.297000</td>\n",
" <td>0.228751</td>\n",
" <td>0.847000</td>\n",
" <td>0.847000</td>\n",
" <td>0.847000</td>\n",
" <td>0.847000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1000</td>\n",
" <td>0.153700</td>\n",
" <td>0.221675</td>\n",
" <td>0.850000</td>\n",
" <td>0.850000</td>\n",
" <td>0.850000</td>\n",
" <td>0.850000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1100</td>\n",
" <td>0.153700</td>\n",
" <td>0.218299</td>\n",
" <td>0.866000</td>\n",
" <td>0.866000</td>\n",
" <td>0.866000</td>\n",
" <td>0.866000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table><p>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"Saving model checkpoint to output/checkpoint-500\n",
"Configuration saved in output/checkpoint-500/config.json\n",
"Model weights saved in output/checkpoint-500/pytorch_model.bin\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"Saving model checkpoint to output/checkpoint-1000\n",
"Configuration saved in output/checkpoint-1000/config.json\n",
"Model weights saved in output/checkpoint-1000/pytorch_model.bin\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"\n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"Loading best model from output/checkpoint-500 (score: 0.21706973016262054).\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"TrainOutput(global_step=1100, training_loss=0.212534950429743, metrics={'train_runtime': 724.5874, 'train_samples_per_second': 16.561, 'train_steps_per_second': 2.07, 'total_flos': 2315418864844800.0, 'train_loss': 0.212534950429743, 'epoch': 2.2})"
]
},
"metadata": {},
"execution_count": 110
}
],
"source": [
"trainer.train()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "1FQxrdS9sGXZ",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 89
},
"outputId": "24d69727-de29-431a-85b8-f7a31c394c39"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"***** Running Prediction *****\n",
" Num examples = 1000\n",
" Batch size = 8\n"
]
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='125' max='125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [125/125 00:19]\n",
" </div>\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {}
}
],
"source": [
"result = trainer.predict(val_dataset)"
]
},
{
"cell_type": "code",
"source": [
"print(result.metrics)"
],
"metadata": {
"id": "9QtUEeoVxJkt",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "077812cc-3848-4b56-ac52-a9368920819b"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{'test_loss': 0.21706973016262054, 'test_accuracy': 0.83, 'test_precision': 0.83, 'test_recall': 0.83, 'test_f1': 0.83, 'test_runtime': 19.3166, 'test_samples_per_second': 51.769, 'test_steps_per_second': 6.471}\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"filename = 'model_encoder.pkl'\n",
"trainer.save_model(filename)"
],
"metadata": {
"id": "R3yrxs0ANvEQ",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "5ef8e1be-f0d5-4d76-a94f-f3eb29317a92"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"Saving model checkpoint to model_encoder.pkl\n",
"Configuration saved in model_encoder.pkl/config.json\n",
"Model weights saved in model_encoder.pkl/pytorch_model.bin\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Z6ja8jhrmzwI"
},
"source": [
"# Model typu decoder"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "tUf06zqBAwXG",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "df99ab7a-ea9b-4e7c-b121-3cfbbafb7347"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: transformers in /usr/local/lib/python3.7/dist-packages (4.16.2)\n",
"Requirement already satisfied: sacremoses in /usr/local/lib/python3.7/dist-packages (from transformers) (0.0.47)\n",
"Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.11.0)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.21.5)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.4.0)\n",
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.62.3)\n",
"Requirement already satisfied: tokenizers!=0.11.3,>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.11.5)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.4.2)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (6.0)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers) (3.10.0.2)\n",
"Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.7)\n",
"Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.7.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.10.8)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n",
"Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n",
"Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n",
"Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.1.0)\n"
]
}
],
"source": [
"!pip install transformers"
]
},
{
"cell_type": "code",
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
],
"metadata": {
"id": "wble-rL7Q0Mk",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "8eca782e-ab77-4f67-df5b-fb97b9bbe481"
},
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Mounted at /content/drive\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "HBw75r5XBoui"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score\n",
"import torch\n",
"from transformers import TrainingArguments, Trainer\n",
"from transformers import EarlyStoppingCallback\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"id": "AFgWRwlimzwJ",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "5ff5cd14-b8dd-425a-f9c2-5a29f9496870"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"loading file https://huggingface.co/allenai/longformer-scico/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/1ee5ff92bf5d5e992fcf9006e19b6a4ad35d7c8564ef75f4d79a1ed2153273ff.bfdcc444ff249bca1a95ca170ec350b442f81804d7df3a95a2252217574121d7\n",
"loading file https://huggingface.co/allenai/longformer-scico/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/0bc7fa46278c9aeb0db119eeac69668e732999ecf7e70938f7fabc0c50da0ed6.f5b91da9e34259b8f4d88dbc97c740667a0e8430b96314460cdb04e86d4fc435\n",
"loading file https://huggingface.co/allenai/longformer-scico/resolve/main/added_tokens.json from cache at /root/.cache/huggingface/transformers/3f461190d4c3e4866b53ee0eb0cc229b7868d365099be2f8e40def2f56f64bd1.b2dabb9d6f1c7ea55d3c9c1c2037f316794ad095778dd06ae6a225cc74100b76\n",
"loading file https://huggingface.co/allenai/longformer-scico/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/d599b9f7e2161f0d2e3b9c8fd9cebef8b07c938f69b08a0a42e78c584f1b4b1e.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n",
"loading file https://huggingface.co/allenai/longformer-scico/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/da65f1f02a542899b2b4e34dbc660a4afcad000d51ea419fc5fd6a227a122f5e.3f75ee48edc5dac7e53863302122c4a3cee3a14a708eca842a8f62714c185ca5\n",
"loading file https://huggingface.co/allenai/longformer-scico/resolve/main/tokenizer.json from cache at None\n",
"loading configuration file https://huggingface.co/allenai/longformer-scico/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/7e93cc5a6217edd672fdad60f054eec31e6a815697de5adae6b921e1f75836a3.6890559d1ffa3ad48d00eee0ae70669ec630881b293df48322fe4c28c7088c35\n",
"Model config LongformerConfig {\n",
" \"_name_or_path\": \"allenai/longformer-scico\",\n",
" \"architectures\": [\n",
" \"LongformerForSequenceClassification\"\n",
" ],\n",
" \"attention_mode\": \"longformer\",\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"attention_window\": [\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512\n",
" ],\n",
" \"bos_token_id\": 0,\n",
" \"classifier_dropout\": null,\n",
" \"eos_token_id\": 2,\n",
" \"gradient_checkpointing\": false,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"id2label\": {\n",
" \"0\": \"not related\",\n",
" \"1\": \"coref\",\n",
" \"2\": \"parent\",\n",
" \"3\": \"child\"\n",
" },\n",
" \"ignore_attention_mask\": false,\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"label2id\": {\n",
" \"child\": \"3\",\n",
" \"coref\": \"1\",\n",
" \"not related\": \"0\",\n",
" \"parent\": \"2\"\n",
" },\n",
" \"layer_norm_eps\": 1e-05,\n",
" \"max_position_embeddings\": 4098,\n",
" \"model_type\": \"longformer\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"pad_token_id\": 1,\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"sep_token_id\": 2,\n",
" \"transformers_version\": \"4.16.2\",\n",
" \"type_vocab_size\": 1,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50267\n",
"}\n",
"\n",
"Adding <m> to the vocabulary\n",
"Adding </m> to the vocabulary\n",
"loading configuration file https://huggingface.co/allenai/longformer-scico/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/7e93cc5a6217edd672fdad60f054eec31e6a815697de5adae6b921e1f75836a3.6890559d1ffa3ad48d00eee0ae70669ec630881b293df48322fe4c28c7088c35\n",
"Model config LongformerConfig {\n",
" \"_name_or_path\": \"allenai/longformer-base-4096\",\n",
" \"architectures\": [\n",
" \"LongformerForSequenceClassification\"\n",
" ],\n",
" \"attention_mode\": \"longformer\",\n",
" \"attention_probs_dropout_prob\": 0.1,\n",
" \"attention_window\": [\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512\n",
" ],\n",
" \"bos_token_id\": 0,\n",
" \"classifier_dropout\": null,\n",
" \"eos_token_id\": 2,\n",
" \"gradient_checkpointing\": false,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout_prob\": 0.1,\n",
" \"hidden_size\": 768,\n",
" \"id2label\": {\n",
" \"0\": \"not related\",\n",
" \"1\": \"coref\",\n",
" \"2\": \"parent\",\n",
" \"3\": \"child\"\n",
" },\n",
" \"ignore_attention_mask\": false,\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 3072,\n",
" \"label2id\": {\n",
" \"child\": \"3\",\n",
" \"coref\": \"1\",\n",
" \"not related\": \"0\",\n",
" \"parent\": \"2\"\n",
" },\n",
" \"layer_norm_eps\": 1e-05,\n",
" \"max_position_embeddings\": 4098,\n",
" \"model_type\": \"longformer\",\n",
" \"num_attention_heads\": 12,\n",
" \"num_hidden_layers\": 12,\n",
" \"pad_token_id\": 1,\n",
" \"position_embedding_type\": \"absolute\",\n",
" \"problem_type\": \"multi_label_classification\",\n",
" \"sep_token_id\": 2,\n",
" \"transformers_version\": \"4.16.2\",\n",
" \"type_vocab_size\": 1,\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50267\n",
"}\n",
"\n",
"loading weights file https://huggingface.co/allenai/longformer-scico/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/33709a0b0a44102dd29583428fe5253bf07cbd1ed163757382d471017620ad4d.6fd6d3de002d054747c1a5eb1e2b33e56924ad6db478547c9cf616d11dd48609\n",
"All model checkpoint weights were used when initializing LongformerForSequenceClassification.\n",
"\n",
"All the weights of LongformerForSequenceClassification were initialized from the model checkpoint at allenai/longformer-scico.\n",
"If your task is similar to the task the model of the checkpoint was trained on, you can already use LongformerForSequenceClassification for predictions without further training.\n",
"/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py:2882: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version.\n",
"\n",
"\n",
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
"Skipping line 16844: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 19370: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 31753: field larger than field limit (131072)\n",
"Skipping line 33676: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 65976: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 116130: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 127080: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 154052: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 174200: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 189740: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 274245: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 275624: field larger than field limit (131072)\n",
"Skipping line 302668: field larger than field limit (131072)\n",
"Skipping line 307322: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 317541: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 333957: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 345859: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 359845: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 359846: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 359847: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 359849: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 371329: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 384761: field larger than field limit (131072)\n",
"Skipping line 389712: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 391820: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 398927: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 401260: field larger than field limit (131072)\n",
"Skipping line 403079: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 454667: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 465419: field larger than field limit (131072)\n",
"Skipping line 466152: field larger than field limit (131072)\n",
"Skipping line 485309: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 529874: field larger than field limit (131072)\n",
"Skipping line 552169: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 554628: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 560429: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 589855: field larger than field limit (131072)\n",
"Skipping line 601507: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 614020: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 630106: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 632882: field larger than field limit (131072)\n",
"Skipping line 637573: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 658667: field larger than field limit (131072)\n"
]
}
],
"source": [
"from transformers import LongformerTokenizer, LongformerForSequenceClassification, LongformerConfig\n",
"\n",
"model_name = \"allenai/longformer-scico\"\n",
"config = LongformerConfig(attention_window=32)\n",
"config.attention_window=32\n",
"tokenizer = LongformerTokenizer.from_pretrained(model_name)\n",
"model = LongformerForSequenceClassification(config).from_pretrained(model_name, problem_type=\"multi_label_classification\")\n",
"\n",
"\"\"\"\n",
"1 - 22 -> 1 klasa\n",
"23 - 31 -> 2 klasa\n",
"32 - 39 -> 3 klasa \n",
"40 - 48 -> 4 klasa\n",
"\"\"\"\n",
"\n",
"def mapAgeToClass2(value: pd.DataFrame) -> int:\n",
" if(value['age'] <=22):\n",
" return [1.0,0.0,0.0,0.0]\n",
" elif(value['age'] > 22 and value['age'] <= 31):\n",
" return [0.0,1.0,0.0,0.0]\n",
" elif(value['age'] > 31 and value['age'] <= 39):\n",
" return [0.0,0.0,1.0,0.0]\n",
" else:\n",
" return [0.0,0.0,0.0,1.0]\n",
"\n",
"data_path = 'drive/MyDrive/blogtext.csv'\n",
"\n",
"data = pd.read_csv(data_path, error_bad_lines=False, engine='python')\n",
"data = data[:data_amount]\n",
"data['label'] = data.apply(lambda row: mapAgeToClass2(row), axis=1)\n",
"\n",
"\n",
"X = list(data['text'])\n",
"Y = list(data['label'])\n",
"if (torch.cuda.is_available()):\n",
" device = \"cuda:0\"\n",
" torch.cuda.empty_cache()\n",
"\n",
"\n",
"X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2)\n",
"\n",
"X_train_tokenized = tokenizer(X_train, padding=True, truncation=True, max_length=128)\n",
"X_val_tokenized = tokenizer(X_val, padding=True, truncation=True, max_length=128)\n",
"\n",
"class Dataset(torch.utils.data.Dataset):\n",
" def __init__(self, encodings, labels=None):\n",
" self.encodings = encodings\n",
" self.labels = labels\n",
"\n",
" def __getitem__(self, idx):\n",
" item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}\n",
" if self.labels:\n",
" item[\"labels\"] = torch.tensor(self.labels[idx])\n",
" return item\n",
"\n",
" def __len__(self):\n",
" return len(self.encodings[\"input_ids\"])\n",
"\n",
"train_dataset = Dataset(X_train_tokenized, y_train)\n",
"val_dataset = Dataset(X_val_tokenized, y_val)\n",
"\n",
"def compute_metrics(p):\n",
" pred, labels = p\n",
" pred = np.argmax(pred, axis=1)\n",
" labels = np.argmax(labels, axis=1)\n",
"\n",
" accuracy = accuracy_score(y_true=labels, y_pred=pred)\n",
" recall = recall_score(y_true=labels, y_pred=pred, average='micro')\n",
" precision = precision_score(y_true=labels, y_pred=pred, average='micro')\n",
" f1 = f1_score(y_true=labels, y_pred=pred, average='micro')\n",
"\n",
" return {\"accuracy\": accuracy, \"precision\": precision, \"recall\": recall, \"f1\": f1}\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"id": "b-3I70XlmzwK",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "89edfdfb-2462-4bce-ab20-9d9572388be3"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"PyTorch: setting up devices\n",
"The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
]
}
],
"source": [
"args = TrainingArguments(\n",
" output_dir=\"output\",\n",
" evaluation_strategy=\"steps\",\n",
" eval_steps=100,\n",
" per_device_train_batch_size=8,\n",
" per_device_eval_batch_size=8,\n",
" num_train_epochs=3,\n",
" seed=0,\n",
" load_best_model_at_end=True\n",
")\n",
"trainer = Trainer(\n",
" model=model,\n",
" args=args,\n",
" train_dataset=train_dataset,\n",
" eval_dataset=val_dataset,\n",
" compute_metrics=compute_metrics,\n",
" callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"id": "4lvaWP9RmzwK",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "a170114b-47ff-4080-ab78-164a0dcc3c15"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" FutureWarning,\n",
"***** Running training *****\n",
" Num examples = 4000\n",
" Num Epochs = 3\n",
" Instantaneous batch size per device = 8\n",
" Total train batch size (w. parallel, distributed & accumulation) = 8\n",
" Gradient Accumulation steps = 1\n",
" Total optimization steps = 1500\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n"
]
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='1500' max='1500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [1500/1500 35:53, Epoch 3/3]\n",
" </div>\n",
" <table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>Step</th>\n",
" <th>Training Loss</th>\n",
" <th>Validation Loss</th>\n",
" <th>Accuracy</th>\n",
" <th>Precision</th>\n",
" <th>Recall</th>\n",
" <th>F1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>100</td>\n",
" <td>No log</td>\n",
" <td>0.407021</td>\n",
" <td>0.625000</td>\n",
" <td>0.625000</td>\n",
" <td>0.625000</td>\n",
" <td>0.625000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>200</td>\n",
" <td>No log</td>\n",
" <td>0.333797</td>\n",
" <td>0.690000</td>\n",
" <td>0.690000</td>\n",
" <td>0.690000</td>\n",
" <td>0.690000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>300</td>\n",
" <td>No log</td>\n",
" <td>0.403388</td>\n",
" <td>0.644000</td>\n",
" <td>0.644000</td>\n",
" <td>0.644000</td>\n",
" <td>0.644000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>400</td>\n",
" <td>No log</td>\n",
" <td>0.296055</td>\n",
" <td>0.747000</td>\n",
" <td>0.747000</td>\n",
" <td>0.747000</td>\n",
" <td>0.747000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>500</td>\n",
" <td>0.370100</td>\n",
" <td>0.318152</td>\n",
" <td>0.713000</td>\n",
" <td>0.713000</td>\n",
" <td>0.713000</td>\n",
" <td>0.713000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>600</td>\n",
" <td>0.370100</td>\n",
" <td>0.301799</td>\n",
" <td>0.740000</td>\n",
" <td>0.740000</td>\n",
" <td>0.740000</td>\n",
" <td>0.740000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>700</td>\n",
" <td>0.370100</td>\n",
" <td>0.295635</td>\n",
" <td>0.715000</td>\n",
" <td>0.715000</td>\n",
" <td>0.715000</td>\n",
" <td>0.715000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>800</td>\n",
" <td>0.370100</td>\n",
" <td>0.268345</td>\n",
" <td>0.765000</td>\n",
" <td>0.765000</td>\n",
" <td>0.765000</td>\n",
" <td>0.765000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>900</td>\n",
" <td>0.370100</td>\n",
" <td>0.282199</td>\n",
" <td>0.753000</td>\n",
" <td>0.753000</td>\n",
" <td>0.753000</td>\n",
" <td>0.753000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1000</td>\n",
" <td>0.294600</td>\n",
" <td>0.265310</td>\n",
" <td>0.788000</td>\n",
" <td>0.788000</td>\n",
" <td>0.788000</td>\n",
" <td>0.788000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1100</td>\n",
" <td>0.294600</td>\n",
" <td>0.268466</td>\n",
" <td>0.789000</td>\n",
" <td>0.789000</td>\n",
" <td>0.789000</td>\n",
" <td>0.789000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1200</td>\n",
" <td>0.294600</td>\n",
" <td>0.245028</td>\n",
" <td>0.804000</td>\n",
" <td>0.804000</td>\n",
" <td>0.804000</td>\n",
" <td>0.804000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1300</td>\n",
" <td>0.294600</td>\n",
" <td>0.260589</td>\n",
" <td>0.808000</td>\n",
" <td>0.808000</td>\n",
" <td>0.808000</td>\n",
" <td>0.808000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1400</td>\n",
" <td>0.294600</td>\n",
" <td>0.247587</td>\n",
" <td>0.807000</td>\n",
" <td>0.807000</td>\n",
" <td>0.807000</td>\n",
" <td>0.807000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1500</td>\n",
" <td>0.213700</td>\n",
" <td>0.242638</td>\n",
" <td>0.824000</td>\n",
" <td>0.824000</td>\n",
" <td>0.824000</td>\n",
" <td>0.824000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table><p>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"\u001b[1;30;43mStrumieniowane dane wyjściowe obcięte do 5000 ostatnich wierszy.\u001b[0m\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Saving model checkpoint to output/checkpoint-500\n",
"Configuration saved in output/checkpoint-500/config.json\n",
"Model weights saved in output/checkpoint-500/pytorch_model.bin\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Saving model checkpoint to output/checkpoint-1000\n",
"Configuration saved in output/checkpoint-1000/config.json\n",
"Model weights saved in output/checkpoint-1000/pytorch_model.bin\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Saving model checkpoint to output/checkpoint-1500\n",
"Configuration saved in output/checkpoint-1500/config.json\n",
"Model weights saved in output/checkpoint-1500/pytorch_model.bin\n",
"\n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"Loading best model from output/checkpoint-1500 (score: 0.24263811111450195).\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"TrainOutput(global_step=1500, training_loss=0.29278671264648437, metrics={'train_runtime': 2154.3722, 'train_samples_per_second': 5.57, 'train_steps_per_second': 0.696, 'total_flos': 985291591680000.0, 'train_loss': 0.29278671264648437, 'epoch': 3.0})"
]
},
"metadata": {},
"execution_count": 8
}
],
"source": [
"trainer.train()"
]
},
{
"cell_type": "code",
"source": [
"result = trainer.predict(val_dataset)"
],
"metadata": {
"id": "YTeHJ_c6I2iy",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "f2e1ab2b-807c-473c-f73a-0e0815b1b4a8"
},
"execution_count": 9,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"***** Running Prediction *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n"
]
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='125' max='125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [125/125 00:37]\n",
" </div>\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n",
"Initializing global attention on CLS token...\n",
"Input ids are automatically padded from 128 to 512 to be a multiple of `config.attention_window`: 512\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(result.metrics)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "RWdJsGGYz9-p",
"outputId": "ab22383a-3ac4-4832-800d-f1a5415b173c"
},
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{'test_loss': 0.24263811111450195, 'test_accuracy': 0.824, 'test_precision': 0.824, 'test_recall': 0.824, 'test_f1': 0.824, 'test_runtime': 38.0024, 'test_samples_per_second': 26.314, 'test_steps_per_second': 3.289}\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"filename='model_decoder'\n",
"trainer.save_model(filename)"
],
"metadata": {
"id": "SSAnGmAXZGsT",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e443c6a4-cf9b-4448-8e6d-eb816d4435ee"
},
"execution_count": 11,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"Saving model checkpoint to model_decoder\n",
"Configuration saved in model_decoder/config.json\n",
"Model weights saved in model_decoder/pytorch_model.bin\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "ESiFpWJYzcgC"
},
"source": [
"# Model typu encoder-decoder"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "WYz-wVPoz_tJ",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e25586d5-4b1c-4588-efde-1da9f0a18cfd"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Requirement already satisfied: sentencepiece==0.1.91 in /usr/local/lib/python3.7/dist-packages (0.1.91)\n",
"Requirement already satisfied: transformers in /usr/local/lib/python3.7/dist-packages (4.16.2)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (6.0)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.4.2)\n",
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.62.3)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n",
"Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.11.0)\n",
"Requirement already satisfied: tokenizers!=0.11.3,>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.11.5)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.4.0)\n",
"Requirement already satisfied: sacremoses in /usr/local/lib/python3.7/dist-packages (from transformers) (0.0.47)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.21.5)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers) (3.10.0.2)\n",
"Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.7)\n",
"Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.7.0)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.10.8)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n",
"Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.1.0)\n",
"Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n",
"Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n"
]
}
],
"source": [
"!pip install sentencepiece==0.1.91\n",
"!pip install transformers"
]
},
{
"cell_type": "code",
"source": [
"from google.colab import drive\n",
"drive.mount('/content/drive')"
],
"metadata": {
"id": "phhvsUnGYC-o"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "Ylv54tLmBX6a"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score\n",
"import torch\n",
"from transformers import TrainingArguments, Trainer\n",
"from transformers import EarlyStoppingCallback\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "_-KcWxdqzgF2",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "44fe6f02-c75b-47d1-f856-866f2fd8c51f"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"loading file https://huggingface.co/t5-small/resolve/main/spiece.model from cache at /root/.cache/huggingface/transformers/65fc04e21f45f61430aea0c4fedffac16a4d20d78b8e6601d8d996ebefefecd2.3b69006860e7b5d0a63ffdddc01ddcd6b7c318a6f4fd793596552c741734c62d\n",
"loading file https://huggingface.co/t5-small/resolve/main/added_tokens.json from cache at None\n",
"loading file https://huggingface.co/t5-small/resolve/main/special_tokens_map.json from cache at None\n",
"loading file https://huggingface.co/t5-small/resolve/main/tokenizer_config.json from cache at None\n",
"loading file https://huggingface.co/t5-small/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/06779097c78e12f47ef67ecb728810c2ae757ee0a9efe9390c6419783d99382d.8627f1bd5d270a9fd2e5a51c8bec3223896587cc3cfe13edeabb0992ab43c529\n",
"loading configuration file https://huggingface.co/t5-small/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fe501e8fd6425b8ec93df37767fcce78ce626e34cc5edc859c662350cf712e41.406701565c0afd9899544c1cb8b93185a76f00b31e5ce7f6e18bbaef02241985\n",
"Model config T5Config {\n",
" \"_name_or_path\": \"t5-small\",\n",
" \"architectures\": [\n",
" \"T5WithLMHeadModel\"\n",
" ],\n",
" \"d_ff\": 2048,\n",
" \"d_kv\": 64,\n",
" \"d_model\": 512,\n",
" \"decoder_start_token_id\": 0,\n",
" \"dropout_rate\": 0.1,\n",
" \"eos_token_id\": 1,\n",
" \"feed_forward_proj\": \"relu\",\n",
" \"initializer_factor\": 1.0,\n",
" \"is_encoder_decoder\": true,\n",
" \"layer_norm_epsilon\": 1e-06,\n",
" \"model_type\": \"t5\",\n",
" \"n_positions\": 512,\n",
" \"num_decoder_layers\": 6,\n",
" \"num_heads\": 8,\n",
" \"num_layers\": 6,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": 0,\n",
" \"relative_attention_num_buckets\": 32,\n",
" \"task_specific_params\": {\n",
" \"summarization\": {\n",
" \"early_stopping\": true,\n",
" \"length_penalty\": 2.0,\n",
" \"max_length\": 200,\n",
" \"min_length\": 30,\n",
" \"no_repeat_ngram_size\": 3,\n",
" \"num_beams\": 4,\n",
" \"prefix\": \"summarize: \"\n",
" },\n",
" \"translation_en_to_de\": {\n",
" \"early_stopping\": true,\n",
" \"max_length\": 300,\n",
" \"num_beams\": 4,\n",
" \"prefix\": \"translate English to German: \"\n",
" },\n",
" \"translation_en_to_fr\": {\n",
" \"early_stopping\": true,\n",
" \"max_length\": 300,\n",
" \"num_beams\": 4,\n",
" \"prefix\": \"translate English to French: \"\n",
" },\n",
" \"translation_en_to_ro\": {\n",
" \"early_stopping\": true,\n",
" \"max_length\": 300,\n",
" \"num_beams\": 4,\n",
" \"prefix\": \"translate English to Romanian: \"\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.16.2\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 32128\n",
"}\n",
"\n",
"loading configuration file https://huggingface.co/t5-small/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fe501e8fd6425b8ec93df37767fcce78ce626e34cc5edc859c662350cf712e41.406701565c0afd9899544c1cb8b93185a76f00b31e5ce7f6e18bbaef02241985\n",
"Model config T5Config {\n",
" \"architectures\": [\n",
" \"T5WithLMHeadModel\"\n",
" ],\n",
" \"d_ff\": 2048,\n",
" \"d_kv\": 64,\n",
" \"d_model\": 512,\n",
" \"decoder_start_token_id\": 0,\n",
" \"dropout_rate\": 0.1,\n",
" \"eos_token_id\": 1,\n",
" \"feed_forward_proj\": \"relu\",\n",
" \"initializer_factor\": 1.0,\n",
" \"is_encoder_decoder\": true,\n",
" \"layer_norm_epsilon\": 1e-06,\n",
" \"model_type\": \"t5\",\n",
" \"n_positions\": 512,\n",
" \"num_decoder_layers\": 6,\n",
" \"num_heads\": 8,\n",
" \"num_layers\": 6,\n",
" \"output_past\": true,\n",
" \"pad_token_id\": 0,\n",
" \"relative_attention_num_buckets\": 32,\n",
" \"task_specific_params\": {\n",
" \"summarization\": {\n",
" \"early_stopping\": true,\n",
" \"length_penalty\": 2.0,\n",
" \"max_length\": 200,\n",
" \"min_length\": 30,\n",
" \"no_repeat_ngram_size\": 3,\n",
" \"num_beams\": 4,\n",
" \"prefix\": \"summarize: \"\n",
" },\n",
" \"translation_en_to_de\": {\n",
" \"early_stopping\": true,\n",
" \"max_length\": 300,\n",
" \"num_beams\": 4,\n",
" \"prefix\": \"translate English to German: \"\n",
" },\n",
" \"translation_en_to_fr\": {\n",
" \"early_stopping\": true,\n",
" \"max_length\": 300,\n",
" \"num_beams\": 4,\n",
" \"prefix\": \"translate English to French: \"\n",
" },\n",
" \"translation_en_to_ro\": {\n",
" \"early_stopping\": true,\n",
" \"max_length\": 300,\n",
" \"num_beams\": 4,\n",
" \"prefix\": \"translate English to Romanian: \"\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.16.2\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 32128\n",
"}\n",
"\n",
"loading weights file https://huggingface.co/t5-small/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/fee5a3a0ae379232608b6eed45d2d7a0d2966b9683728838412caccc41b4b0ed.ddacdc89ec88482db20c676f0861a336f3d0409f94748c209847b49529d73885\n",
"All model checkpoint weights were used when initializing T5ForConditionalGeneration.\n",
"\n",
"All the weights of T5ForConditionalGeneration were initialized from the model checkpoint at t5-small.\n",
"If your task is similar to the task the model of the checkpoint was trained on, you can already use T5ForConditionalGeneration for predictions without further training.\n",
"/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py:2882: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version.\n",
"\n",
"\n",
" exec(code_obj, self.user_global_ns, self.user_ns)\n",
"Skipping line 16844: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 19370: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 31753: field larger than field limit (131072)\n",
"Skipping line 33676: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 65976: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 116130: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 127080: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 154052: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 174200: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 189740: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 274245: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 275624: field larger than field limit (131072)\n",
"Skipping line 302668: field larger than field limit (131072)\n",
"Skipping line 307322: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 317541: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 333957: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 345859: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 359845: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 359846: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 359847: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 359849: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 371329: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 384761: field larger than field limit (131072)\n",
"Skipping line 389712: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 391820: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 398927: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 401260: field larger than field limit (131072)\n",
"Skipping line 403079: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 454667: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 465419: field larger than field limit (131072)\n",
"Skipping line 466152: field larger than field limit (131072)\n",
"Skipping line 485309: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 529874: field larger than field limit (131072)\n",
"Skipping line 552169: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 554628: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 560429: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 589855: field larger than field limit (131072)\n",
"Skipping line 601507: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 614020: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 630106: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 632882: field larger than field limit (131072)\n",
"Skipping line 637573: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
"Skipping line 658667: field larger than field limit (131072)\n"
]
}
],
"source": [
"from transformers import T5Tokenizer, T5ForConditionalGeneration\n",
"from transformers import EvalPrediction\n",
"\n",
"model_name = \"t5-small\"\n",
"tokenizer = T5Tokenizer.from_pretrained(model_name)\n",
"model = T5ForConditionalGeneration.from_pretrained(model_name)\n",
"\n",
"\"\"\"\n",
"1 - 22 -> 1 klasa\n",
"23 - 31 -> 2 klasa\n",
"32 - 39 -> 3 klasa \n",
"40 - 48 -> 4 klasa\n",
"\"\"\"\n",
"\n",
"def mapAgeToClass2(value: pd.DataFrame):\n",
" if(value['age'] <=22):\n",
" return 'class1'\n",
" elif(value['age'] > 22 and value['age'] <= 31):\n",
" return 'class2'\n",
" elif(value['age'] > 31 and value['age'] <= 39):\n",
" return 'class3'\n",
" else:\n",
" return 'class4'\n",
"\n",
"data_path = 'drive/MyDrive/blogtext.csv'\n",
"\n",
"data = pd.read_csv(data_path, error_bad_lines=False, engine='python')\n",
"data = data[:data_amount]\n",
"data['label'] = data.apply(lambda row: mapAgeToClass2(row), axis=1)\n",
"\n",
"\n",
"X = list(data['text'])\n",
"Y = list(data['label'])\n",
"if (torch.cuda.is_available()):\n",
" device = \"cuda:0\"\n",
" torch.cuda.empty_cache()\n",
"\n",
"\n",
"X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2)\n",
"\n",
"X_train_tokenized = tokenizer(X_train, padding=True, truncation=True, max_length=1024)\n",
"X_val_tokenized = tokenizer(X_val, padding=True, truncation=True, max_length=1024)\n",
"\n",
"class Dataset(torch.utils.data.Dataset):\n",
" def __init__(self, encodings, labels=None):\n",
" self.encodings = encodings\n",
" self.labels = labels\n",
"\n",
" def __getitem__(self, idx):\n",
" item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}\n",
" if self.labels:\n",
" item[\"labels\"] = torch.tensor(tokenizer(self.labels[idx])['input_ids'])\n",
" return item\n",
"\n",
" def __len__(self):\n",
" return len(self.encodings[\"input_ids\"])\n",
"\n",
"train_dataset = Dataset(X_train_tokenized, y_train)\n",
"val_dataset = Dataset(X_val_tokenized, y_val)\n",
"\n",
"def compute_metrics(pred):\n",
" labels_ids = pred.label_ids\n",
" pred_ids = pred.predictions\n",
"\n",
" pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)\n",
" label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)\n",
"\n",
" accuracy = sum([int(pred_str[i] == label_str[i]) for i in range(len(pred_str))]) / len(pred_str)\n",
"\n",
" return {\"accuracy\": accuracy}\n",
"\n"
]
},
{
"cell_type": "code",
"source": [
"from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer\n",
"\n",
"args = Seq2SeqTrainingArguments(\n",
" output_dir=\"output\",\n",
" evaluation_strategy=\"steps\",\n",
" eval_steps=50,\n",
" per_device_train_batch_size=8,\n",
" per_device_eval_batch_size=8,\n",
" num_train_epochs=3,\n",
" seed=0,\n",
" load_best_model_at_end=True,\n",
" predict_with_generate=True\n",
")\n",
"\n",
"trainer = Seq2SeqTrainer(\n",
" model=model,\n",
" args=args,\n",
" train_dataset=train_dataset,\n",
" eval_dataset=val_dataset,\n",
" compute_metrics=compute_metrics\n",
")"
],
"metadata": {
"id": "XayaHmAMgI1x",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "4c32a772-96bc-4a43-b406-110c5f311932"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"PyTorch: setting up devices\n",
"The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
]
}
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "8nVY24TCz1Mi",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"outputId": "b4542048-d208-463a-b088-df9645f8b92d"
},
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" FutureWarning,\n",
"***** Running training *****\n",
" Num examples = 4000\n",
" Num Epochs = 3\n",
" Instantaneous batch size per device = 8\n",
" Total train batch size (w. parallel, distributed & accumulation) = 8\n",
" Gradient Accumulation steps = 1\n",
" Total optimization steps = 1500\n"
]
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='1500' max='1500' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [1500/1500 32:13, Epoch 3/3]\n",
" </div>\n",
" <table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: left;\">\n",
" <th>Step</th>\n",
" <th>Training Loss</th>\n",
" <th>Validation Loss</th>\n",
" <th>Accuracy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>50</td>\n",
" <td>No log</td>\n",
" <td>2.898511</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>100</td>\n",
" <td>No log</td>\n",
" <td>0.437433</td>\n",
" <td>0.601000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>150</td>\n",
" <td>No log</td>\n",
" <td>0.301548</td>\n",
" <td>0.645000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>200</td>\n",
" <td>No log</td>\n",
" <td>0.278892</td>\n",
" <td>0.668000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>250</td>\n",
" <td>No log</td>\n",
" <td>0.270245</td>\n",
" <td>0.686000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>300</td>\n",
" <td>No log</td>\n",
" <td>0.286085</td>\n",
" <td>0.663000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>350</td>\n",
" <td>No log</td>\n",
" <td>0.262066</td>\n",
" <td>0.708000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>400</td>\n",
" <td>No log</td>\n",
" <td>0.257251</td>\n",
" <td>0.697000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>450</td>\n",
" <td>No log</td>\n",
" <td>0.252848</td>\n",
" <td>0.710000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>500</td>\n",
" <td>1.057600</td>\n",
" <td>0.248504</td>\n",
" <td>0.701000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>550</td>\n",
" <td>1.057600</td>\n",
" <td>0.251563</td>\n",
" <td>0.721000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>600</td>\n",
" <td>1.057600</td>\n",
" <td>0.239508</td>\n",
" <td>0.731000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>650</td>\n",
" <td>1.057600</td>\n",
" <td>0.235462</td>\n",
" <td>0.738000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>700</td>\n",
" <td>1.057600</td>\n",
" <td>0.246152</td>\n",
" <td>0.734000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>750</td>\n",
" <td>1.057600</td>\n",
" <td>0.237433</td>\n",
" <td>0.733000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>800</td>\n",
" <td>1.057600</td>\n",
" <td>0.234127</td>\n",
" <td>0.752000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>850</td>\n",
" <td>1.057600</td>\n",
" <td>0.224785</td>\n",
" <td>0.760000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>900</td>\n",
" <td>1.057600</td>\n",
" <td>0.222618</td>\n",
" <td>0.747000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>950</td>\n",
" <td>1.057600</td>\n",
" <td>0.217110</td>\n",
" <td>0.770000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1000</td>\n",
" <td>0.266600</td>\n",
" <td>0.214305</td>\n",
" <td>0.765000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1050</td>\n",
" <td>0.266600</td>\n",
" <td>0.213813</td>\n",
" <td>0.771000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1100</td>\n",
" <td>0.266600</td>\n",
" <td>0.212208</td>\n",
" <td>0.774000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1150</td>\n",
" <td>0.266600</td>\n",
" <td>0.211007</td>\n",
" <td>0.772000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1200</td>\n",
" <td>0.266600</td>\n",
" <td>0.210451</td>\n",
" <td>0.768000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1250</td>\n",
" <td>0.266600</td>\n",
" <td>0.210460</td>\n",
" <td>0.768000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1300</td>\n",
" <td>0.266600</td>\n",
" <td>0.214561</td>\n",
" <td>0.769000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1350</td>\n",
" <td>0.266600</td>\n",
" <td>0.210450</td>\n",
" <td>0.767000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1400</td>\n",
" <td>0.266600</td>\n",
" <td>0.209276</td>\n",
" <td>0.767000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1450</td>\n",
" <td>0.266600</td>\n",
" <td>0.210069</td>\n",
" <td>0.769000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1500</td>\n",
" <td>0.244700</td>\n",
" <td>0.210056</td>\n",
" <td>0.766000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table><p>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"Saving model checkpoint to output/checkpoint-500\n",
"Configuration saved in output/checkpoint-500/config.json\n",
"Model weights saved in output/checkpoint-500/pytorch_model.bin\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"Saving model checkpoint to output/checkpoint-1000\n",
"Configuration saved in output/checkpoint-1000/config.json\n",
"Model weights saved in output/checkpoint-1000/pytorch_model.bin\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"***** Running Evaluation *****\n",
" Num examples = 1000\n",
" Batch size = 8\n",
"Saving model checkpoint to output/checkpoint-1500\n",
"Configuration saved in output/checkpoint-1500/config.json\n",
"Model weights saved in output/checkpoint-1500/pytorch_model.bin\n",
"\n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"Loading best model from output/checkpoint-1500 (score: 0.2100560963153839).\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"TrainOutput(global_step=1500, training_loss=0.5229549509684245, metrics={'train_runtime': 1934.1295, 'train_samples_per_second': 6.204, 'train_steps_per_second': 0.776, 'total_flos': 3248203235328000.0, 'train_loss': 0.5229549509684245, 'epoch': 3.0})"
]
},
"metadata": {},
"execution_count": 20
}
],
"source": [
"trainer.train()"
]
},
{
"cell_type": "code",
"source": [
"result = trainer.predict(val_dataset)"
],
"metadata": {
"id": "yBrHzXzhaKvk",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 89
},
"outputId": "92a6c3ea-4695-4dcf-fe71-9621fadc9906"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"***** Running Prediction *****\n",
" Num examples = 1000\n",
" Batch size = 8\n"
]
},
{
"output_type": "display_data",
"data": {
"text/html": [
"\n",
" <div>\n",
" \n",
" <progress value='125' max='125' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
" [125/125 00:33]\n",
" </div>\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {}
}
]
},
{
"cell_type": "code",
"source": [
"print(result.metrics)"
],
"metadata": {
"id": "nzm2vx86llKw",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "6be67f3e-043c-423c-c81a-8686d59a656e"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{'test_loss': 0.2100560963153839, 'test_accuracy': 0.766, 'test_runtime': 45.1374, 'test_samples_per_second': 22.155, 'test_steps_per_second': 2.769}\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"filename='model_encoder_decoder'\n",
"trainer.save_model(filename)"
],
"metadata": {
"id": "LWpjAH_YaL66",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "e5c07d60-c441-4dc1-8ce0-66e815823a68"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"Saving model checkpoint to model_encoder_decoder\n",
"Configuration saved in model_encoder_decoder/config.json\n",
"Model weights saved in model_encoder_decoder/pytorch_model.bin\n"
]
}
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"collapsed_sections": [],
"name": "main.ipynb",
"provenance": [],
"machine_shape": "hm"
},
"interpreter": {
"hash": "f4394274b6de412f99b9d08dfb473204abc12afd5637ebb20c9ad8dbd67e97a0"
},
"kernelspec": {
"display_name": "Python 3.10.1 64-bit ('venv': venv)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 0
}