diff --git a/init.sh b/init.sh old mode 100644 new mode 100755 index b57bfd4..3f2c6cb --- a/init.sh +++ b/init.sh @@ -4,5 +4,8 @@ pip3 install gdown pip3 install sklearn pip3 install matplotlib gdown --fuzzy https://drive.google.com/file/d/153UDq2d-iVcLfc-7DeGm4ZfczjTQPxpF/view?usp=sharing +# TODO trzeba chyba ustawić pobieranie directory +gdown --fuzzy https://drive.google.com/drive/folders/1-7W9l6HkP_6KtWk_-TWH76QWTPgih2Lb?usp=sharing +gdown --fuzzy https://drive.google.com/drive/folders/1Y60LOzUQJRHIrq-DRm45cNERPdM8GPMu?usp=sharing mkdir data mv blogtext.csv data \ No newline at end of file diff --git a/main.ipynb b/main.ipynb index c739032..477104c 100644 --- a/main.ipynb +++ b/main.ipynb @@ -1,538 +1,8679 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score\n", - "import torch\n", - "from transformers import TrainingArguments, Trainer\n", - "from transformers import BertTokenizer, BertForSequenceClassification\n", - "from transformers import EarlyStoppingCallback\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ + "cells": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idgenderagetopicsigndatetext
02059027male15StudentLeo14,May,2004Info has been found (+/- 100 pages,...
12059027male15StudentLeo13,May,2004These are the team members: Drewe...
22059027male15StudentLeo12,May,2004In het kader van kernfusie op aarde...
32059027male15StudentLeo12,May,2004testing!!! testing!!!
43581210male33InvestmentBankingAquarius11,June,2004Thanks to Yahoo!'s Toolbar I can ...
\n", - "
" + "cell_type": "code", + "source": [ + "data_amount = 5000" ], - "text/plain": [ - " id gender age topic sign date \\\n", - "0 2059027 male 15 Student Leo 14,May,2004 \n", - "1 2059027 male 15 Student Leo 13,May,2004 \n", - "2 2059027 male 15 Student Leo 12,May,2004 \n", - "3 2059027 male 15 Student Leo 12,May,2004 \n", - "4 3581210 male 33 InvestmentBanking Aquarius 11,June,2004 \n", - "\n", - " text \n", - "0 Info has been found (+/- 100 pages,... \n", - "1 These are the team members: Drewe... \n", - "2 In het kader van kernfusie op aarde... \n", - "3 testing!!! testing!!! \n", - "4 Thanks to Yahoo!'s Toolbar I can ... " - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data = pd.read_csv(\"data/blogtext.csv\")\n", - "data = data[:100]\n", - "data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Model typu encoder (BertForSequenceClassification)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']\n", - "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", - "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", - "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n", - "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" - ] - } - ], - "source": [ - "model_name = 'bert-base-uncased'\n", - "tokenizer = BertTokenizer.from_pretrained(model_name)\n", - "model = BertForSequenceClassification.from_pretrained(model_name, problem_type=\"multi_label_classification\", num_labels=4)" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEICAYAAABRSj9aAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAbqElEQVR4nO3df5RcZZ3n8feHhAShFQhgL0KUCMnOCeMsY5oEZpXpFgcadiTObJhN88OwA5tVJ3N29Lizcd2JJLp7Dq4L4x5wNLOwMIDpZOPqZLUdQEkfRpcfIcqvBhKaiJKIsNBELBFj4Lt/3CdOTVnVVd1V1dU8+bzOqdP3x/Pc+62bW5+6/VT1jSICMzPL1yGdLsDMzNrLQW9mljkHvZlZ5hz0ZmaZc9CbmWXOQW9mljkHvXWcpBFJvZ2uo5Mk/YGkpyWVJP12p+uxvDjora0kPSXpvRXLLpP07QPzEXFqRAzX2c5JkkLSzDaV2mmfBVZFRFdEfK9aAxV2SXp0imuz1zkHvRkwDd5A3gaM1GlzFvBm4O2STm9/SZYLB711XPlVv6TFku6X9JKkZyVdnZrdlX7uTcMbZ0o6RNJ/kvQDSc9J+htJR5Zt9wNp3QuS/qJiP1dK2izpFkkvAZelfd8taa+kZyRdK2lW2fZC0oclPSHpp5I+JelkSf831bupvH3Fc6xaq6TZkkrADOBBSU+Oc6hWAH8LDKXp8u3Pk3RXquubkq6TdEvZ+jNSnXslPXiwD5UdbBz0Nt18DvhcRLwJOBnYlJaflX4elYY37gYuS48+4O1AF3AtgKSFwOeBi4HjgSOBEyr2tRTYDBwF3Aq8CnwEOBY4Ezgb+HBFn3OBRcAZwJ8D64FLgLnAbwIDNZ5X1Voj4hcR0ZXa/LOIOLlaZ0mHA8tSnbcCyyveVL4E3AccA1wJXFrW9wTg68CngTnAx4AvSzquRq2WGQe9TYWvpivJvZL2UgRwLb8ETpF0bESUIuKecdpeDFwdEbsiogR8nCIAZ1KE4v+JiG9HxD5gDVB5Y6e7I+KrEfFaRPw8IrZHxD0RsT8ingK+CPxuRZ/PRMRLETECPALcnvb/E+AbQK0PUsertRF/CPwCuJ0itA8F/gWApLcCpwNrImJfRHwb2FLW9xJgKCKG0nO9A7gfOL/BfdvrnIPepsL7I+KoAw9+/Sq53OXAAuBxSdsk/f44bd8C/KBs/gfATKA7rXv6wIqIeBl4oaL/0+UzkhZI+pqkH6fhnP9CcXVf7tmy6Z9Xme+iuvFqbcQKYFN6E3oF+DL/MHzzFmAsPccDyp/b24ALK95s30Xxm44dBDr9AZTZPxIRTwADkg6huIrdLOkYfv1qHOBHFCF2wFuB/RTh+wzwTw+skPQGimGNf7S7ivm/Ar4HDETETyX9GcVvBq0wXq3jknQi8B5gsaR/mRYfDhwm6ViK5zpH0uFlYT+3bBNPAzdHxL9p8jnY65Sv6G1akXSJpOMi4jVgb1r8GvD/0s+3lzXfAHwkfRDZRXEFvjEi9lOMvb9P0u+ksewrAdXZ/RuBl4CSpN8APtSip1Wv1nouBXZSvHGdlh4LgN0Ub0o/oBiKuVLSLElnAu8r638LxbE4V9IMSYdJ6k1vIHYQcNDbdNMPjKRvonwOWJ7Gz18G/jPwnTT8cAZwA3AzxTdyvg+8AvwpQBpD/1NgkOKKtwQ8RzHOXcvHgIuAnwJ/DWxs4fOqWWsDVgCfj4gflz+AL/APwzcXU3yA/ALFh64bSc81Ip6m+OD5P1K8YT4N/Hv8+j9oyP/xiB0M0lX0XmB+RHy/w+W0naSNwOMR8clO12Kd53d0y5ak90k6XNIRFH95+jDwVGerag9Jp6fv9B8iqZ/iCv6rHS7LpgkHveVsKcWHoD8C5lMMA+X6K+w/AYYphqj+O/ChWrdSsIOPh27MzDLnK3ozs8xNu+/RH3vssXHSSSd1bP8/+9nPOOKIIzq2/3pcX3NcX3NcX3PaWd/27dufj4jqt7WIiGn1WLRoUXTS1q1bO7r/elxfc1xfc1xfc9pZH3B/1MhVD92YmWXOQW9mljkHvZlZ5hz0ZmaZc9CbmWXOQW9mljkHvZlZ5hz0ZmaZc9CbmWVu2t0Cwczy0dc3tfsbGIC1a6d2nxNRr76tW9uzX1/Rm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpa5hoJeUr+kHZJGJa2usv4sSd+VtF/Ssirr3yRpt6RrW1G0mZk1rm7QS5oBXAecBywEBiQtrGj2Q+Ay4Es1NvMp4K7Jl2lmZpPVyBX9YmA0InZFxD5gEFha3iAinoqIh4DXKjtLWgR0A7e3oF4zM5sgFf/V4DgNiqGY/oi4Is1fCiyJiFVV2t4IfC0iNqf5Q4A7gUuA9wI9NfqtBFYCdHd3LxocHGzmOTWlVCrR1dXVsf3X4/qa4/qaM9H6du5sYzFVzJlTYmxs+h6/evUtWDD5bff19W2PiJ5q69p9C4QPA0MRsVtSzUYRsR5YD9DT0xO9vb1tLqu24eFhOrn/elxfc1xfcyZa31TfjmBgYJgNG3qndqcTUK++dt0CoZGg3wPMLZs/MS1rxJnAuyV9GOgCZkkqRcSvfaBrZmbt0UjQbwPmS5pHEfDLgYsa2XhEXHxgWtJlFEM3DnkzsylU98PYiNgPrAJuAx4DNkXEiKR1ki4AkHS6pN3AhcAXJY20s2gzM2tcQ2P0ETEEDFUsW1M2vY1iSGe8bdwI3DjhCs3MrCn+y1gzs8w56M3MMuegNzPLnIPezCxzDnozs8w56M3MMuegNzPLnIPezCxzDnozs8w56M3MMuegNzPLnIPezCxzDnozs8w56M3MMuegNzPLnIPezCxzDnozs8w56M3MMuegNzPLXENBL6lf0g5Jo5JWV1l/lqTvStovaVnZ8tMk3S1pRNJDkv5VK4s3M7P66ga9pBnAdcB5wEJgQNLCimY/BC4DvlSx/GXgAxFxKtAP/KWko5qs2czMJmBmA20WA6MRsQtA0iCwFHj0QIOIeCqte628Y0TsLJv+kaTngOOAvc0WbmZmjVFEjN+gGIrpj4gr0vylwJKIWFWl7Y3A1yJic5V1i4GbgFMj4rWKdSuBlQDd3d2LBgcHJ/dsWqBUKtHV1dWx/dfj+prj+poz0fp27qzfppXmzCkxNjZ9j1+9+hYsmPy2+/r6tkdET7V1jVzRN03S8cDNwIrKkAeIiPXAeoCenp7o7e2dirKqGh4eppP7r8f1Ncf1NWei9a1d275aqhkYGGbDht6p3ekE1Ktv69b27LeRD2P3AHPL5k9Myxoi6U3A14FPRMQ9EyvPzMya1UjQbwPmS5onaRawHNjSyMZT+68Af1NtOMfMzNqvbtBHxH5gFXAb8BiwKSJGJK2TdAGApNMl7QYuBL4oaSR1/yPgLOAySQ+kx2nteCJmZlZdQ2P0ETEEDFUsW1M2vY1iSKey3y3ALU3WaGZmTfBfxpqZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmGgp6Sf2SdkgalbS6yvqzJH1X0n5JyyrWrZD0RHqsaFXhZmbWmLpBL2kGcB1wHrAQGJC0sKLZD4HLgC9V9J0DfBJYAiwGPinp6ObLNjOzRjVyRb8YGI2IXRGxDxgElpY3iIinIuIh4LWKvucCd0TEWES8CNwB9LegbjMza5AiYvwGxVBMf0RckeYvBZZExKoqbW8EvhYRm9P8x4DDIuLTaf4vgJ9HxGcr+q0EVgJ0d3cvGhwcbPZ5TVqpVKKrq6tj+6/H9TXH9TVnovXt3NnGYqqYM6fE2Nj0PX716luwYPLb7uvr2x4RPdXWzZz8ZlsnItYD6wF6enqit7e3Y7UMDw/Tyf3X4/qa4/qaM9H61q5tXy3VDAwMs2FD79TudALq1bd1a3v228jQzR5gbtn8iWlZI5rpa2ZmLdBI0G8D5kuaJ2kWsBzY0uD2bwPOkXR0+hD2nLTMzMymSN2gj4j9wCqKgH4M2BQRI5LWSboAQNLpknYDFwJflDSS+o4Bn6J4s9gGrEvLzMxsijQ0Rh8RQ8BQxbI1ZdPbKIZlqvW9AbihiRrNzKwJ/stYM7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy1xDQS+pX9IOSaOSVldZP1vSxrT+XkknpeWHSrpJ0sOSHpP08RbXb2ZmddQNekkzgOuA84CFwICkhRXNLgdejIhTgGuAq9LyC4HZEfEOYBHwbw+8CZiZ2dRo5Ip+MTAaEbsiYh8wCCytaLMUuClNbwbOliQggCMkzQTeAOwDXmpJ5WZm1hBFxPgNpGVAf0RckeYvBZZExKqyNo+kNrvT/JPAEuAnwM3A2cDhwEciYn2VfawEVgJ0d3cvGhwcbMFTm5xSqURXV1fH9l+P62uO62vOROvbubONxVQxZ06JsbHpe/zq1bdgweS33dfXtz0ieqqtmzn5zTZkMfAq8BbgaODvJX0zInaVN0rhvx6gp6cnent721xWbcPDw3Ry//W4vua4vuZMtL61a9tXSzUDA8Ns2NA7tTudgHr1bd3anv02MnSzB5hbNn9iWla1TRqmORJ4AbgI+LuI+GVEPAd8B6j6jmNmZu3RSNBvA+ZLmidpFrAc2FLRZguwIk0vA+6MYkzoh8B7ACQdAZwBPN6Kws3MrDF1gz4i9gOrgNuAx4BNETEiaZ2kC1Kz64FjJI0CHwUOfAXzOqBL0gjFG8b/jIiHWv0kzMystobG6CNiCBiqWLambPoViq9SVvYrVVtuZmZTx38Za2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplrKOgl9UvaIWlU0uoq62dL2pjW3yvppLJ1vyXpbkkjkh6WdFgL6zczszrqBr2kGcB1wHnAQmBA0sKKZpcDL0bEKcA1wFWp70zgFuCDEXEq0Av8smXVm5lZXY1c0S8GRiNiV0TsAwaBpRVtlgI3penNwNmSBJwDPBQRDwJExAsR8WprSjczs0YoIsZvIC0D+iPiijR/KbAkIlaVtXkktdmd5p8ElgCXAIuANwPHAYMR8Zkq+1gJrATo7u5eNDg42IKnNjmlUomurq6O7b8e19cc19ecida3c2cbi6lizpwSY2PT9/jVq2/Bgslvu6+vb3tE9FRbN3Pym23ITOBdwOnAy8C3JG2PiG+VN4qI9cB6gJ6enujt7W1zWbUNDw/Tyf3X4/qa4/qaM9H61q5tXy3VDAwMs2FD79TudALq1bd1a3v220jQ7wHmls2fmJZVa7M7jcsfCbwA7AbuiojnASQNAe8EvkWb9PU1139gYOpPzolodX3tOrHMbPpoZIx+GzBf0jxJs4DlwJaKNluAFWl6GXBnFGNCtwHvkHR4egP4XeDR1pRuZmaNqHtFHxH7Ja2iCO0ZwA0RMSJpHXB/RGwBrgduljQKjFG8GRARL0q6muLNIoChiPh6m56LmZlV0dAYfUQMAUMVy9aUTb8CXFij7y0UX7E0M7MO8F/GmpllzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYaCnpJ/ZJ2SBqVtLrK+tmSNqb190o6qWL9WyWVJH2sRXWbmVmD6ga9pBnAdcB5wEJgQNLCimaXAy9GxCnANcBVFeuvBr7RfLlmZjZRjVzRLwZGI2JXROwDBoGlFW2WAjel6c3A2ZIEIOn9wPeBkZZUbGZmE9JI0J8APF02vzstq9omIvYDPwGOkdQF/AdgbfOlmpnZZCgixm8gLQP6I+KKNH8psCQiVpW1eSS12Z3mnwSWAKuB+yJik6QrgVJEfLbKPlYCKwG6u7sXDQ4OTvoJ7dw56a4AzJlTYmysq7mNtFGr61uwoGWbAqBUKtHVNX2Pn+trzkTra/b1OFGv99dvM6/Hvr6+7RHRU23dzAb67wHmls2fmJZVa7Nb0kzgSOAFirBfJukzwFHAa5JeiYhryztHxHpgPUBPT0/09vY2UFZ1a5v83WFgYJgNGya//3ZrdX1bt7ZsUwAMDw/TzL9fu7m+5ky0vmZfjxP1en/9tvr1eEAjQb8NmC9pHkWgLwcuqmizBVgB3A0sA+6M4leFdx9oUHZFfy1mZjZl6gZ9ROyXtAq4DZgB3BARI5LWAfdHxBbgeuBmSaPAGMWbgZmZTQONXNETEUPAUMWyNWXTrwAX1tnGlZOoz8zMmuS/jDUzy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLXUNBL6pe0Q9KopNVV1s+WtDGtv1fSSWn570naLunh9PM9La7fzMzqqBv0kmYA1wHnAQuBAUkLK5pdDrwYEacA1wBXpeXPA++LiHcAK4CbW1W4mZk1ppEr+sXAaETsioh9wCCwtKLNUuCmNL0ZOFuSIuJ7EfGjtHwEeIOk2a0o3MzMGqOIGL+BtAzoj4gr0vylwJKIWFXW5pHUZneafzK1eb5iOx+MiPdW2cdKYCVAd3f3osHBwUk/oZ07J90VgDlzSoyNdTW3kTZqdX0LFrRsUwCUSiW6uqbv8XN9zZlofc2+Hifq9f76beb12NfXtz0ieqqtmzn5zTZO0qkUwznnVFsfEeuB9QA9PT3R29s76X2tXTvprgAMDAyzYcPk999ura5v69aWbQqA4eFhmvn3azfX15yJ1tfs63GiXu+v31a/Hg9oZOhmDzC3bP7EtKxqG0kzgSOBF9L8icBXgA9ExJPNFmxmZhPTSNBvA+ZLmidpFrAc2FLRZgvFh60Ay4A7IyIkHQV8HVgdEd9pUc1mZjYBdYM+IvYDq4DbgMeATRExImmdpAtSs+uBYySNAh8FDnwFcxVwCrBG0gPp8eaWPwszM6upoTH6iBgChiqWrSmbfgW4sEq/TwOfbrJGMzNrgv8y1swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy9yU/OfgZrno62vv9gcGpv4/1J6I6V6fVecrejOzzDnozcwy11DQS+qXtEPSqKTVVdbPlrQxrb9X0kll6z6elu+QdG4LazczswbUDXpJM4DrgPOAhcCApIUVzS4HXoyIU4BrgKtS34XAcuBUoB/4fNqemZlNkUau6BcDoxGxKyL2AYPA0oo2S4Gb0vRm4GxJSssHI+IXEfF9YDRtz8zMpkgj37o5AXi6bH43sKRWm4jYL+knwDFp+T0VfU+o3IGklcDKNFuStKOh6ttgeJhjgec7tf96Wl2f1Kot/cq0Pn5M8/oOtvOv1V7v9TX5enxbrRXT4uuVEbEeWN/pOgAk3R8RPZ2uoxbX1xzX1xzX15xO1dfI0M0eYG7Z/IlpWdU2kmYCRwIvNNjXzMzaqJGg3wbMlzRP0iyKD1e3VLTZAqxI08uAOyMi0vLl6Vs584D5wH2tKd3MzBpRd+gmjbmvAm4DZgA3RMSIpHXA/RGxBbgeuFnSKDBG8WZAarcJeBTYD/xJRLzapufSKtNiCGkcrq85rq85rq85HalPxYW3mZnlyn8Za2aWOQe9mVnmDpqgl3SDpOckPVK27EpJeyQ9kB7n1+g77i0g2ljfxrLanpL0QI2+T0l6OLW7vw21zZW0VdKjkkYk/bu0fI6kOyQ9kX4eXaP/itTmCUkrqrVpU33/VdLjkh6S9BVJR9Xo39bjV6fGjp+D49Q2Lc6/tI/DJN0n6cFU49q0fF667cpoqndWjf5tvRXLOPXdmvb5SHqNH1qj/6tlx7ryyy7Ni4iD4gGcBbwTeKRs2ZXAx+r0mwE8CbwdmAU8CCycivoq1v83YE2NdU8Bx7bx2B0PvDNNvxHYSXE7jM8Aq9Py1cBVVfrOAXaln0en6aOnqL5zgJlp+VXV6puK41enxo6fg7Vqmy7nX9qHgK40fShwL3AGsAlYnpZ/AfhQlb4L0zGbDcxLx3LGFNV3flonYEO1+lKfUjuP30FzRR8Rd1F8I2iiGrkFRNPGq0+SgD+iOFGmXEQ8ExHfTdM/BR6j+Avn8ltf3AS8v0r3c4E7ImIsIl4E7qC471Hb64uI2yNif2p2D8XfcXTEOMewEW09B+vV1unzL9UVEVFKs4emRwDvobjtCtQ+B9t+K5Za9UXEUFoXFF8t78g5eNAE/ThWpV/tb6gx9FDtFhCNvkBb5d3AsxHxRI31AdwuabuK20m0jYo7k/42xRVLd0Q8k1b9GOiu0mVKj19FfeX+GPhGjW5Tdvygao3T5hyscfymxfknaUYaPnqO4oLhSWBv2Zt5reMyJcevsr6IuLds3aHApcDf1eh+mKT7Jd0j6f2tru1gD/q/Ak4GTgOeofj1dDoaYPyrqXdFxDsp7jD6J5LOakcRkrqALwN/FhEvla9LVywd/a5urfokfYLi7zhurdF1So5fjRqnzTk4zr/vtDj/IuLViDiN4qp4MfAb7djPZFXWJ+k3y1Z/HrgrIv6+Rve3RXFrhIuAv5R0citrO6iDPiKeTf84rwF/TfVf5zp6GwcVt5T4Q2BjrTYRsSf9fA74Cm24Q2i6IvkycGtE/O+0+FlJx6f1x1NcyVSakuNXoz4kXQb8PnBxejP6NVNx/GrVOF3OwXGO37Q4/yr2txfYCpwJHJVqhNrHZUpfw2X19QNI+iRwHPDRcfocOIa7gGGK36pa5qAO+gMhlfwB8EiVZo3cAqKd3gs8HhG7q62UdISkNx6YpvgAstrzmLQ0Rns98FhEXF22qvzWFyuAv63S/TbgHElHp2GJc9KyttcnqR/4c+CCiHi5Rt+2H786NXb8HBzn3xemwfmXtn2c0remJL0B+D2KzxK2Utx2BWqfg22/FUuN+h6XdAXF51QD6c28Wt+jJc1O08cC/5zibgKt085PeqfTg+JXz2eAX1KM0V0O3Aw8DDxEcTIcn9q+BRgq63s+xTcRngQ+MVX1peU3Ah+saPur+ii+ifFgeoy0oz7gXRTDMg8BD6TH+RS3ov4W8ATwTWBOat8D/I+y/n9M8QHYKPCvp7C+UYqx2QPLvtCJ41enxo6fg7Vqmy7nX9rPbwHfSzU+QvoGUNr/fenf+n8Bs9PyC4B1Zf0/kY7dDuC8Kaxvf9rvgeN6YPmvXiPA76Rz4MH08/JW1+dbIJiZZe6gHroxMzsYOOjNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy9z/B8C5FPdY0UE0AAAAAElFTkSuQmCC", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" + "metadata": { + "id": "oTWhQK1Aw-J7" + }, + "execution_count": 4, + "outputs": [] }, { - "data": { - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "n, bins, patches = plt.hist(data['age'], 4, density=True, facecolor='b', alpha=0.75)\n", - "\n", - "plt.title('Histogram of Age')\n", - "plt.grid(True)\n", - "plt.figure(figsize=(100,100), dpi=100)\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idgenderagetopicsigndatetextlabel
02059027male15StudentLeo14,May,2004Info has been found (+/- 100 pages,...[1.0, 0.0, 0.0, 0.0]
12059027male15StudentLeo13,May,2004These are the team members: Drewe...[1.0, 0.0, 0.0, 0.0]
22059027male15StudentLeo12,May,2004In het kader van kernfusie op aarde...[1.0, 0.0, 0.0, 0.0]
32059027male15StudentLeo12,May,2004testing!!! testing!!![1.0, 0.0, 0.0, 0.0]
43581210male33InvestmentBankingAquarius11,June,2004Thanks to Yahoo!'s Toolbar I can ...[0.0, 0.0, 1.0, 0.0]
\n", - "
" + "cell_type": "code", + "execution_count": 97, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aCSHYmqxnJmd", + "outputId": "6ed62e92-6bc5-49b5-af9d-dc8a9f0e0528" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: transformers in /usr/local/lib/python3.7/dist-packages (4.16.2)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.4.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.4.2)\n", + "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.11.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.62.3)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n", + "Requirement already satisfied: sacremoses in /usr/local/lib/python3.7/dist-packages (from transformers) (0.0.47)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.21.5)\n", + "Requirement already satisfied: tokenizers!=0.11.3,>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.11.5)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (6.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers) (3.10.0.2)\n", + "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.7)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.7.0)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.10.8)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.1.0)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n" + ] + } ], - "text/plain": [ - " id gender age topic sign date \\\n", - "0 2059027 male 15 Student Leo 14,May,2004 \n", - "1 2059027 male 15 Student Leo 13,May,2004 \n", - "2 2059027 male 15 Student Leo 12,May,2004 \n", - "3 2059027 male 15 Student Leo 12,May,2004 \n", - "4 3581210 male 33 InvestmentBanking Aquarius 11,June,2004 \n", - "\n", - " text label \n", - "0 Info has been found (+/- 100 pages,... [1.0, 0.0, 0.0, 0.0] \n", - "1 These are the team members: Drewe... [1.0, 0.0, 0.0, 0.0] \n", - "2 In het kader van kernfusie op aarde... [1.0, 0.0, 0.0, 0.0] \n", - "3 testing!!! testing!!! [1.0, 0.0, 0.0, 0.0] \n", - "4 Thanks to Yahoo!'s Toolbar I can ... [0.0, 0.0, 1.0, 0.0] " + "source": [ + "!pip3 install transformers" ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\"\"\"\n", - "1 - 22 -> 1 klasa\n", - "23 - 31 -> 2 klasa\n", - "32 - 39 -> 3 klasa \n", - "40 - 48 -> 4 klasa\n", - "\"\"\"\n", - "\n", - "def mapAgeToClass(value: pd.DataFrame) -> int:\n", - " if(value['age'] <=22):\n", - " return 1\n", - " elif(value['age'] > 22 and value['age'] <= 31):\n", - " return 2\n", - " elif(value['age'] > 31 and value['age'] <= 39):\n", - " return 3\n", - " else:\n", - " return 4\n", - "\n", - "def mapAgeToClass2(value: pd.DataFrame) -> int:\n", - " if(value['age'] <=22):\n", - " return [1.0,0.0,0.0,0.0]\n", - " elif(value['age'] > 22 and value['age'] <= 31):\n", - " return [0.0,1.0,0.0,0.0]\n", - " elif(value['age'] > 31 and value['age'] <= 39):\n", - " return [0.0,0.0,1.0,0.0]\n", - " else:\n", - " return [0.0,0.0,0.0,1.0]\n", - " \n", - "data['label'] = data.apply(lambda row: mapAgeToClass2(row), axis=1)\n", - "data.head()\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "X = list(data['text'])\n", - "Y = list(data['label'])\n", - "if (torch.cuda.is_available()):\n", - " device = \"cuda:0\"\n", - " torch.cuda.empty_cache()\n", - "else:\n", - " device = \"cpu\"\n", - "device = \"cpu\"\n", - "\n", - "# model = model.to(device)\n", - "\n", - "X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2)\n", - "X_train_tokenized = tokenizer(X_train, padding=True, truncation=True, max_length=512)\n", - "# .to(device)\n", - "X_val_tokenized = tokenizer(X_val, padding=True, truncation=True, max_length=512)\n", - "# .to(device)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "class Dataset(torch.utils.data.Dataset):\n", - " def __init__(self, encodings, labels=None):\n", - " self.encodings = encodings\n", - " self.labels = labels\n", - "\n", - " def __getitem__(self, idx):\n", - " item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}\n", - " if self.labels:\n", - " item[\"labels\"] = torch.tensor(self.labels[idx])\n", - " return item\n", - "\n", - " def __len__(self):\n", - " return len(self.encodings[\"input_ids\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "train_dataset = Dataset(X_train_tokenized, y_train)\n", - "val_dataset = Dataset(X_val_tokenized, y_val)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "def compute_metrics(p):\n", - " pred, labels = p\n", - " pred = np.argmax(pred, axis=1)\n", - "\n", - " accuracy = accuracy_score(y_true=labels, y_pred=pred)\n", - " recall = recall_score(y_true=labels, y_pred=pred)\n", - " precision = precision_score(y_true=labels, y_pred=pred)\n", - " f1 = f1_score(y_true=labels, y_pred=pred)\n", - "\n", - " return {\"accuracy\": accuracy, \"precision\": precision, \"recall\": recall, \"f1\": f1}\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ + }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "PyTorch: setting up devices\n", - "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n" - ] - } - ], - "source": [ - "args = TrainingArguments(\n", - " output_dir=\"output\",\n", - " evaluation_strategy=\"steps\",\n", - " eval_steps=500,\n", - " per_device_train_batch_size=8,\n", - " per_device_eval_batch_size=8,\n", - " num_train_epochs=3,\n", - " seed=0,\n", - " load_best_model_at_end=True,\n", - " no_cuda=True\n", - ")\n", - "trainer = Trainer(\n", - " model=model,\n", - " args=args,\n", - " train_dataset=train_dataset,\n", - " eval_dataset=val_dataset,\n", - " compute_metrics=compute_metrics,\n", - " callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": 98, + "metadata": { + "id": "Jk99LQjzmzvw" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score\n", + "import torch\n", + "from transformers import TrainingArguments, Trainer\n", + "from transformers import BertTokenizer, BertForSequenceClassification\n", + "from transformers import EarlyStoppingCallback\n", + "import matplotlib.pyplot as plt" + ] + }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/ramon/projects/projekt_glebokie/venv/lib/python3.7/site-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", - " FutureWarning,\n", - "***** Running training *****\n", - " Num examples = 80\n", - " Num Epochs = 3\n", - " Instantaneous batch size per device = 8\n", - " Total train batch size (w. parallel, distributed & accumulation) = 8\n", - " Gradient Accumulation steps = 1\n", - " Total optimization steps = 30\n" - ] + "cell_type": "code", + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wb-zBvXlaTAO", + "outputId": "62a128ce-8b64-404a-a462-b9c92e350246" + }, + "execution_count": 99, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "jFTxtzzKmzv1", + "outputId": "afc4b505-8f43-484b-dbad-20f9e97fe37d" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py:2882: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version.\n", + "\n", + "\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + "Skipping line 16844: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 19370: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 31753: field larger than field limit (131072)\n", + "Skipping line 33676: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 65976: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 116130: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 127080: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 154052: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 174200: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 189740: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 274245: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 275624: field larger than field limit (131072)\n", + "Skipping line 302668: field larger than field limit (131072)\n", + "Skipping line 307322: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 317541: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 333957: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 345859: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 359845: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 359846: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 359847: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 359849: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 371329: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 384761: field larger than field limit (131072)\n", + "Skipping line 389712: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 391820: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 398927: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 401260: field larger than field limit (131072)\n", + "Skipping line 403079: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 454667: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 465419: field larger than field limit (131072)\n", + "Skipping line 466152: field larger than field limit (131072)\n", + "Skipping line 485309: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 529874: field larger than field limit (131072)\n", + "Skipping line 552169: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 554628: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 560429: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 589855: field larger than field limit (131072)\n", + "Skipping line 601507: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 614020: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 630106: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 632882: field larger than field limit (131072)\n", + "Skipping line 637573: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 658667: field larger than field limit (131072)\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idgenderagetopicsigndatetext
02059027male15StudentLeo14,May,2004Info has been found (+/- 100 pages,...
12059027male15StudentLeo13,May,2004These are the team members: Drewe...
22059027male15StudentLeo12,May,2004In het kader van kernfusie op aarde...
32059027male15StudentLeo12,May,2004testing!!! testing!!!
43581210male33InvestmentBankingAquarius11,June,2004Thanks to Yahoo!'s Toolbar I can ...
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " id ... text\n", + "0 2059027 ... Info has been found (+/- 100 pages,...\n", + "1 2059027 ... These are the team members: Drewe...\n", + "2 2059027 ... In het kader van kernfusie op aarde...\n", + "3 2059027 ... testing!!! testing!!! \n", + "4 3581210 ... Thanks to Yahoo!'s Toolbar I can ...\n", + "\n", + "[5 rows x 7 columns]" + ] + }, + "metadata": {}, + "execution_count": 100 + } + ], + "source": [ + "data_path = 'drive/MyDrive/blogtext.csv'\n", + "data = pd.read_csv(data_path, error_bad_lines=False, engine='python')\n", + "data = data[:data_amount]\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": { + "id": "84JKE7annp0B" + }, + "outputs": [], + "source": [ + "if (torch.cuda.is_available()):\n", + " device = \"cuda:0\"\n", + " torch.cuda.empty_cache()\n", + "else:\n", + " device = \"cpu\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KbKkY5Dbmzv3" + }, + "source": [ + "# Model typu encoder (BertForSequenceClassification)" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "s4sgNo1rmzv5", + "outputId": "fbce79df-ecf4-4b7f-bc91-b430b7747ccc" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "loading file https://huggingface.co/bert-base-uncased/resolve/main/vocab.txt from cache at /root/.cache/huggingface/transformers/45c3f7a79a80e1cf0a489e5c62b43f173c15db47864303a55d623bb3c96f72a5.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99\n", + "loading file https://huggingface.co/bert-base-uncased/resolve/main/added_tokens.json from cache at None\n", + "loading file https://huggingface.co/bert-base-uncased/resolve/main/special_tokens_map.json from cache at None\n", + "loading file https://huggingface.co/bert-base-uncased/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/c1d7f0a763fb63861cc08553866f1fc3e5a6f4f07621be277452d26d71303b7e.20430bd8e10ef77a7d2977accefe796051e01bc2fc4aa146bc862997a1a15e79\n", + "loading file https://huggingface.co/bert-base-uncased/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/534479488c54aeaf9c3406f647aa2ec13648c06771ffe269edabebd4c412da1d.7f2721073f19841be16f41b0a70b600ca6b880c8f3df6f3535cbc704371bdfa4\n", + "loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e\n", + "Model config BertConfig {\n", + " \"_name_or_path\": \"bert-base-uncased\",\n", + " \"architectures\": [\n", + " \"BertForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"classifier_dropout\": null,\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"max_position_embeddings\": 512,\n", + " \"model_type\": \"bert\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 0,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"transformers_version\": \"4.16.2\",\n", + " \"type_vocab_size\": 2,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 30522\n", + "}\n", + "\n", + "loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e\n", + "Model config BertConfig {\n", + " \"architectures\": [\n", + " \"BertForMaskedLM\"\n", + " ],\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"classifier_dropout\": null,\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": \"LABEL_0\",\n", + " \"1\": \"LABEL_1\",\n", + " \"2\": \"LABEL_2\",\n", + " \"3\": \"LABEL_3\"\n", + " },\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"LABEL_0\": 0,\n", + " \"LABEL_1\": 1,\n", + " \"LABEL_2\": 2,\n", + " \"LABEL_3\": 3\n", + " },\n", + " \"layer_norm_eps\": 1e-12,\n", + " \"max_position_embeddings\": 512,\n", + " \"model_type\": \"bert\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 0,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"problem_type\": \"multi_label_classification\",\n", + " \"transformers_version\": \"4.16.2\",\n", + " \"type_vocab_size\": 2,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 30522\n", + "}\n", + "\n", + "loading weights file https://huggingface.co/bert-base-uncased/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/a8041bf617d7f94ea26d15e218abd04afc2004805632abc0ed2066aa16d50d04.faf6ea826ae9c5867d12b22257f9877e6b8367890837bd60f7c54a29633f7f2f\n", + "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']\n", + "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" + ] + } + ], + "source": [ + "model_name = 'bert-base-uncased'\n", + "tokenizer = BertTokenizer.from_pretrained(model_name)\n", + "model = BertForSequenceClassification.from_pretrained(model_name, problem_type=\"multi_label_classification\", num_labels=4).to(device)" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 299 + }, + "id": "mzfWUCNomzv6", + "outputId": "190e4b3d-cb6e-4ca9-90cc-aa0816834b19" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEICAYAAABRSj9aAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAaIElEQVR4nO3df5DU933f8edLYJDla1CRlKsjsMAG6iKnIdUJ5MZ17qTYPiVxzm5RykUmeAaVOg6ZJK3i4ExCJMXpVJlU1B3LSS4VDUUqhwY1ztU+j+IadlynMgYiKQbZyAeSRsiyVThkeS0jjPXuH/thsl7t3S13++O+H16PmRt9f3x2v6/v1+fXfvnc7Z4iAjMzy9clnQ5gZmat5aI3M8uci97MLHMuejOzzLnozcwy56I3M8uci946TtIRSb2dztFJkt4v6VlJZUk/2ek8lhcXvbWUpKcl/UzNtg9K+uL59Yi4NiJKUzzPEkkhaW6LonbaHwObI6IrIh6tN0AVxyU90eZsVnAuejNgFryAXAMcmWLMO4EfBd4s6frWR7JcuOit46rv+iWtlnRQ0kuSviXpnjTsC+m/L6bpjbdLukTS70p6RtILkv67pAVVz/vLad8pSb9Xc5w7JO2RdL+kl4APpmM/IulFSc9L+oSkeVXPF5I+LOnrkr4j6Q8kvUXS/015H6weX3OOdbNKmi+pDMwBHpd0bJJLtQH4K2A0LVc//1JJX0i5/rekeyXdX7X/hpTzRUmPX+xTZRcbF73NNh8HPh4RPwK8BXgwbX9n+u/laXrjEeCD6asPeDPQBXwCQNJK4JPArcAbgQXA1TXHGgD2AJcDDwA/AH4TuBJ4O3AT8OGax7wHuA64AfgIMAR8AFgMvA0YnOC86maNiFcioiuN+YmIeEu9B0u6DFibcj4ArKt5UfkfwJeBK4A7gPVVj70a+AzwMWAhcDvwkKSrJshqmXHRWzt8Kt1JvijpRSoFPJHvA8skXRkR5Yj40iRjbwXuiYjjEVEGPkqlAOdSKcX/FRFfjIizwFag9oOdHomIT0XEqxHxvYg4FBFfiohzEfE08GfAT9c85o8i4qWIOAIcBv46Hf/bwGeBiX6QOlnWRvxL4BXgr6mU9uuAnwOQ9CbgemBrRJyNiC8CI1WP/QAwGhGj6Vw/BxwEfrbBY1vBueitHd4XEZef/+K1d8nVNgIrgK9JOiDp5ycZ+2PAM1XrzwBzge6079nzOyLiZeBUzeOfrV6RtELSpyV9M03n/Acqd/fVvlW1/L06613UN1nWRmwAHkwvQmeAh/j76ZsfA8bTOZ5XfW7XALfUvNi+g8q/dOwi0OkfQJn9kIj4OjAo6RIqd7F7JF3Ba+/GAb5BpcTOexNwjkr5Pg/84/M7JL2eyrTGDx2uZv1PgEeBwYj4jqTfoPIvg2aYLOukJC0CbgRWS/pXafNlwKWSrqRyrgslXVZV9ournuJZYGdE/JsZnoMVlO/obVaR9AFJV0XEq8CLafOrwP9L/31z1fBdwG+mH0R2UbkD3x0R56jMvb9X0j9Pc9l3AJri8P8AeAkoS3or8CvNOq8psk5lPfAklReuVelrBXCCyovSM1SmYu6QNE/S24H3Vj3+firX4j2S5ki6VFJvegGxi4CL3mabfuBI+k2UjwPr0vz5y8AfAn+Tph9uALYDO6n8Rs5TwBng1wDSHPqvAcNU7njLwAtU5rkncjvwS8B3gD8HdjfxvCbM2oANwCcj4pvVX8Cf8vfTN7dS+QHyKSo/dN1NOteIeJbKD55/h8oL5rPAb+H//1805D88YheDdBf9IrA8Ip7qdJ5Wk7Qb+FpE/H6ns1jn+RXdsiXpvZIuk/QGKu88/QrwdGdTtYak69Pv9F8iqZ/KHfynOp3LZgcXveVsgMoPQb8BLKcyDZTrP2H/EVCiMkX1X4BfmeijFOzi46kbM7PM+Y7ezCxzs+736K+88spYsmRJS4/x3e9+lze84Q0tPUYrFTl/kbNDsfMXOTsUO387sh86dOhkRNT9WItZV/RLlizh4MGDLT1GqVSit7e3pcdopSLnL3J2KHb+ImeHYudvR3ZJz0y0z1M3ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZm3XvjDWbzZ58Eu68s9MppmdwsP3Z9+1r7/GsPt/Rm5llzkVvZpY5F72ZWeZc9GZmmXPRm5llzkVvZpY5F72ZWeZc9GZmmXPRm5llzkVvZpY5F72ZWeYaKnpJ/ZKOShqTtKXO/vmSdqf9+yUtSdtvlfRY1derklY19xTMzGwyUxa9pDnAvcDNwEpgUNLKmmEbgdMRsQzYBtwNEBEPRMSqiFgFrAeeiojHmnkCZmY2uUbu6FcDYxFxPCLOAsPAQM2YAWBHWt4D3CRJNWMG02PNzKyNFBGTD5DWAv0RcVtaXw+siYjNVWMOpzEn0vqxNOZk1ZhjwEBEHK5zjE3AJoDu7u7rhodb+3pQLpfp6upq6TFaqcj5i5wd4OTJMuPjxcy/cGH7s69Y0bznKvL3Tjuy9/X1HYqInnr72vJ59JLWAC/XK3mAiBgChgB6enqit7e3pXlKpRKtPkYrFTl/kbMDDA2V2LWrt9MxpmVwsP3Zm/l59EX+3ul09kambp4DFletL0rb6o6RNBdYAJyq2r8O2DX9mGZmNl2NFP0BYLmkpZLmUSntkZoxI8CGtLwW2BtpTkjSJcAv4vl5M7OOmHLqJiLOSdoMPAzMAbZHxBFJdwEHI2IEuA/YKWkMGKfyYnDeO4FnI+J48+ObmdlUGpqjj4hRYLRm29aq5TPALRM8tgTcMP2IZmY2E35nrJlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZa6joJfVLOippTNKWOvvnS9qd9u+XtKRq3z+V9IikI5K+IunS5sU3M7OpTFn0kuYA9wI3AyuBQUkra4ZtBE5HxDJgG3B3euxc4H7gQxFxLdALfL9p6c3MbEqN3NGvBsYi4nhEnAWGgYGaMQPAjrS8B7hJkoB3A38XEY8DRMSpiPhBc6KbmVkjGin6q4Fnq9ZPpG11x0TEOeDbwBXACiAkPSzpbyV9ZOaRzczsQsxtw/O/A7geeBn4vKRDEfH56kGSNgGbALq7uymVSi0NVS6XW36MVipy/iJnB1i4sMzgYKnTMaalE9mb+T91kb93Op29kaJ/Dlhctb4obas35kSal18AnKJy9/+FiDgJIGkU+GfADxV9RAwBQwA9PT3R29t7wSdyIUqlEq0+RisVOX+RswMMDZXYtau30zGmZXCw/dn37WvecxX5e6fT2RuZujkALJe0VNI8YB0wUjNmBNiQltcCeyMigIeBH5d0WXoB+GngieZENzOzRkx5Rx8R5yRtplLac4DtEXFE0l3AwYgYAe4DdkoaA8apvBgQEacl3UPlxSKA0Yj4TIvOxczM6mhojj4iRoHRmm1bq5bPALdM8Nj7qfyKpZmZdYDfGWtmljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZa6joJfVLOippTNKWOvvnS9qd9u+XtCRtXyLpe5IeS19/2tz4ZmY2lSn/OLikOcC9wLuAE8ABSSMR8UTVsI3A6YhYJmkdcDfwr9O+YxGxqsm5zcysQY3c0a8GxiLieEScBYaBgZoxA8COtLwHuEmSmhfTzMymSxEx+QBpLdAfEbel9fXAmojYXDXmcBpzIq0fA9YAXcAR4EngJeB3I+L/1DnGJmATQHd393XDw8NNOLWJlctlurq6WnqMVipy/iJnBzh5ssz4eDHzL1zY/uwrVjTvuYr8vdOO7H19fYcioqfevimnbmboeeBNEXFK0nXApyRdGxEvVQ+KiCFgCKCnpyd6e3tbGqpUKtHqY7RSkfMXOTvA0FCJXbt6Ox1jWgYH2599377mPVeRv3c6nb2RqZvngMVV64vStrpjJM0FFgCnIuKViDgFEBGHgGNAE1/jzcxsKo0U/QFguaSlkuYB64CRmjEjwIa0vBbYGxEh6ar0w1wkvRlYDhxvTnQzM2vElFM3EXFO0mbgYWAOsD0ijki6CzgYESPAfcBOSWPAOJUXA4B3AndJ+j7wKvChiBhvxYmYmVl9Dc3RR8QoMFqzbWvV8hngljqPewh4aIYZzcxsBvzOWDOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy11DRS+qXdFTSmKQtdfbPl7Q77d8vaUnN/jdJKku6vTmxzcysUVMWvaQ5wL3AzcBKYFDSypphG4HTEbEM2AbcXbP/HuCzM49rZmYXqpE7+tXAWEQcj4izwDAwUDNmANiRlvcAN0kSgKT3AU8BR5oT2czMLoQiYvIB0lqgPyJuS+vrgTURsblqzOE05kRaPwasAc4AnwPeBdwOlCPij+scYxOwCaC7u/u64eHhJpzaxMrlMl1dXS09RisVOX+RswOcPFlmfLyY+RcubH/2FSua91xF/t5pR/a+vr5DEdFTb9/clh4Z7gC2RUQ53eDXFRFDwBBAT09P9Pb2tjRUqVSi1cdopSLnL3J2gKGhErt29XY6xrQMDrY/+759zXuuIn/vdDp7I0X/HLC4an1R2lZvzAlJc4EFwCkqd/VrJf0RcDnwqqQzEfGJGSc3M7OGNFL0B4DlkpZSKfR1wC/VjBkBNgCPAGuBvVGZE/oX5wdIuoPK1I1L3sysjaYs+og4J2kz8DAwB9geEUck3QUcjIgR4D5gp6QxYJzKi4GZmc0CDc3RR8QoMFqzbWvV8hnglime445p5DMzsxnyO2PNzDLnojczy5yL3swscy56M7PMuejNzDLnojczy5yL3swscy56M7PMuejNzDLnojczy5yL3swscy56M7PMuejNzDLnojczy5yL3swscy56M7PMuejNzDLnojczy5yL3swscw0VvaR+SUcljUnaUmf/fEm70/79kpak7aslPZa+Hpf0/ubGNzOzqUxZ9JLmAPcCNwMrgUFJK2uGbQROR8QyYBtwd9p+GOiJiFVAP/Bnkhr6g+RmZtYcjdzRrwbGIuJ4RJwFhoGBmjEDwI60vAe4SZIi4uWIOJe2XwpEM0KbmVnjFDF590paC/RHxG1pfT2wJiI2V405nMacSOvH0piTktYA24FrgPUR8Zd1jrEJ2ATQ3d193fDwcFNObiLlcpmurq6WHqOVipy/yNkBTp4sMz5ezPwLF7Y/+4oVzXuuIn/vtCN7X1/foYjoqbev5dMoEbEfuFbSPwF2SPpsRJypGTMEDAH09PREb29vSzOVSiVafYxWKnL+ImcHGBoqsWtXb6djTMvgYPuz79vXvOcq8vdOp7M3MnXzHLC4an1R2lZ3TJqDXwCcqh4QEV8FysDbphvWzMwuXCNFfwBYLmmppHnAOmCkZswIsCEtrwX2RkSkx8wFkHQN8Fbg6aYkNzOzhkw5dRMR5yRtBh4G5gDbI+KIpLuAgxExAtwH7JQ0BoxTeTEAeAewRdL3gVeBD0fEyVaciJmZ1dfQHH1EjAKjNdu2Vi2fAW6p87idwM4ZZjQzsxnwO2PNzDLnojczy5yL3swscy56M7PMuejNzDLnojczy5yL3swscy56M7PMuejNzDLnojczy5yL3swsc/6zfhe5vr72Hm9wEO68s73HbKbBwU4nMLtwvqM3M8uci97MLHPZTd00MhWRw/RBkfObWXv5jt7MLHMuejOzzLnozcwy11DRS+qXdFTSmKQtdfbPl7Q77d8vaUna/i5JhyR9Jf33xubGNzOzqUxZ9JLmAPcCNwMrgUFJK2uGbQROR8QyYBtwd9p+EnhvRPw4sAH//Vgzs7Zr5I5+NTAWEccj4iwwDAzUjBkAdqTlPcBNkhQRj0bEN9L2I8DrJc1vRnAzM2uMImLyAdJaoD8ibkvr64E1EbG5aszhNOZEWj+WxpyseZ4PRcTP1DnGJmATQHd393XDw8PTPqEnn5x6zMKFZcbHu6Z9jE4rcv4iZ4di5+9E9hUrmvdc5XKZrq5iXvt2ZO/r6zsUET319rXl9+glXUtlOufd9fZHxBAwBNDT0xO9vb3TPlYjv18+OFhi167pH6PTipy/yNmh2Pk7kX3fvuY9V6lUYibd0Emdzt7I1M1zwOKq9UVpW90xkuYCC4BTaX0R8JfAL0fEsZkGNjOzC9NI0R8AlktaKmkesA4YqRkzQuWHrQBrgb0REZIuBz4DbImIv2lWaDMza9yURR8R54DNwMPAV4EHI+KIpLsk/UIadh9whaQx4N8B538FczOwDNgq6bH09aNNPwszM5tQQ3P0ETEKjNZs21q1fAa4pc7jPgZ8bIYZzcxsBvzOWDOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy56I3M8uci97MLHMuejOzzLnozcwy11DRS+qXdFTSmKQtdfbPl7Q77d8vaUnafoWkfZLKkj7R3OhmZtaIKYte0hzgXuBmYCUwKGllzbCNwOmIWAZsA+5O288Avwfc3rTEZmZ2QRq5o18NjEXE8Yg4CwwDAzVjBoAdaXkPcJMkRcR3I+KLVArfzMw6QBEx+QBpLdAfEbel9fXAmojYXDXmcBpzIq0fS2NOpvUPAj3Vj6k5xiZgE0B3d/d1w8PD0z6hJ5+ceszChWXGx7umfYxOK3L+ImeHYufvRPYVK5r3XOVyma6uYl77dmTv6+s7FBE99fbNbemRGxQRQ8AQQE9PT/T29k77ue68c+oxg4Mldu2a/jE6rcj5i5wdip2/E9n37Wvec5VKJWbSDZ3U6eyNTN08ByyuWl+UttUdI2kusAA41YyAZmY2M40U/QFguaSlkuYB64CRmjEjwIa0vBbYG1PNCZmZWVtMOXUTEeckbQYeBuYA2yPiiKS7gIMRMQLcB+yUNAaMU3kxAEDS08CPAPMkvQ94d0Q80fxTMTOzehqao4+IUWC0ZtvWquUzwC0TPHbJDPKZmdkM+Z2xZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZmxV/YcrM8tTX17znGhxs7C/IzUaNZm/mX+Sq5jt6M7PMuejNzDLnojczy5yL3swscy56M7PMNVT0kvolHZU0JmlLnf3zJe1O+/dLWlK176Np+1FJ72ledDMza8SURS9pDnAvcDOwEhiUtLJm2EbgdEQsA7YBd6fHrgTWAdcC/cAn0/OZmVmbNHJHvxoYi4jjEXEWGAYGasYMADvS8h7gJklK24cj4pWIeAoYS89nZmZt0sgbpq4Gnq1aPwGsmWhMRJyT9G3girT9SzWPvbr2AJI2AZvSalnS0YbST1OpxJXAyVYeo5WKnL/I2aHY+YucHYqdv9Hs0owOc81EO2bFO2MjYggYatfxJB2MiJ52Ha/Zipy/yNmh2PmLnB2Knb/T2RuZunkOWFy1vihtqztG0lxgAXCqwceamVkLNVL0B4DlkpZKmkflh6sjNWNGgA1peS2wNyIibV+XfitnKbAc+HJzopuZWSOmnLpJc+6bgYeBOcD2iDgi6S7gYESMAPcBOyWNAeNUXgxI4x4EngDOAb8aET9o0blciLZNE7VIkfMXOTsUO3+Rs0Ox83c0uyo33mZmliu/M9bMLHMuejOzzGVf9JK2S3pB0uGqbXdIek7SY+nrZzuZcSKSFkvaJ+kJSUck/XravlDS5yR9Pf33H3Y6az2T5J/111/SpZK+LOnxlP3OtH1p+piPsfSxH/M6nbWeSfL/haSnqq79qk5nnYikOZIelfTptF6Ia39enfwdu/bZFz3wF1Q+fqHWtohYlb5G25ypUeeAfx8RK4EbgF9NHyuxBfh8RCwHPp/WZ6OJ8sPsv/6vADdGxE8Aq4B+STdQ+XiPbenjPk5T+fiP2Wii/AC/VXXtH+tcxCn9OvDVqvWiXPvzavNDh6599kUfEV+g8ptAhRMRz0fE36bl71D5prmaH/7IiR3A+zqTcHKT5J/1oqKcVl+XvgK4kcrHfMDsvvYT5S8ESYuAnwP+a1oXBbn28Nr8nZZ90U9is6S/S1M7s3Lqo1r6RNCfBPYD3RHxfNr1TaC7Q7EaVpMfCnD90z+9HwNeAD4HHANejIhzaUjdj/SYLWrzR8T5a/+H6dpvkzS/gxEn85+BjwCvpvUrKNC157X5z+vItb9Yi/5PgLdQ+Sft88B/6mycyUnqAh4CfiMiXqrel96YNqvv1OrkL8T1j4gfRMQqKu/oXg28tcORLkhtfklvAz5K5TyuBxYCv93BiHVJ+nnghYg41Oks0zFJ/o5d+4uy6CPiW+n/BK8Cf84s/kRNSa+jUpIPRMT/TJu/JemNaf8bqdyxzUr18hfp+gNExIvAPuDtwOXpYz6gIB/pUZW/P02nRUS8Avw3Zue1/yngFyQ9TeXTcm8EPk5xrv1r8ku6v5PX/qIs+vMlmbwfODzR2E5K85L3AV+NiHuqdlV/5MQG4K/ana0RE+UvwvWXdJWky9Py64F3UfkZwz4qH/MBs/va18v/taobBFGZ45511z4iPhoRiyJiCZV32e+NiFspyLWfIP8HOnntZ8WnV7aSpF1AL3ClpBPA7wO96VebAnga+LcdCzi5nwLWA19Jc60AvwP8R+BBSRuBZ4Bf7FC+qUyUf7AA1/+NwA5V/lDOJcCDEfFpSU8Aw5I+BjxK5YVsNpoo/15JVwECHgM+1MmQF+i3Kca1n8gDnbr2/ggEM7PMXZRTN2ZmFxMXvZlZ5lz0ZmaZc9GbmWXORW9mljkXvZlZ5lz0ZmaZ+//gbHXWr/GHYAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {} + } + ], + "source": [ + "n, bins, patches = plt.hist(data['age'], 4, density=True, facecolor='b', alpha=0.75)\n", + "\n", + "plt.title('Histogram of Age')\n", + "plt.grid(True)\n", + "plt.figure(figsize=(100,100), dpi=100)\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "v6WbfLpimzv7", + "outputId": "309e9c1b-5730-4994-cd9a-59078ba1a2b2" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idgenderagetopicsigndatetextlabel
02059027male15StudentLeo14,May,2004Info has been found (+/- 100 pages,...[1.0, 0.0, 0.0, 0.0]
12059027male15StudentLeo13,May,2004These are the team members: Drewe...[1.0, 0.0, 0.0, 0.0]
22059027male15StudentLeo12,May,2004In het kader van kernfusie op aarde...[1.0, 0.0, 0.0, 0.0]
32059027male15StudentLeo12,May,2004testing!!! testing!!![1.0, 0.0, 0.0, 0.0]
43581210male33InvestmentBankingAquarius11,June,2004Thanks to Yahoo!'s Toolbar I can ...[0.0, 0.0, 1.0, 0.0]
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " id ... label\n", + "0 2059027 ... [1.0, 0.0, 0.0, 0.0]\n", + "1 2059027 ... [1.0, 0.0, 0.0, 0.0]\n", + "2 2059027 ... [1.0, 0.0, 0.0, 0.0]\n", + "3 2059027 ... [1.0, 0.0, 0.0, 0.0]\n", + "4 3581210 ... [0.0, 0.0, 1.0, 0.0]\n", + "\n", + "[5 rows x 8 columns]" + ] + }, + "metadata": {}, + "execution_count": 104 + } + ], + "source": [ + "\"\"\"\n", + "1 - 22 -> 1 klasa\n", + "23 - 31 -> 2 klasa\n", + "32 - 39 -> 3 klasa \n", + "40 - 48 -> 4 klasa\n", + "\"\"\"\n", + "def mapAgeToClass2(value: pd.DataFrame) -> int:\n", + " if(value['age'] <=22):\n", + " return [1.0,0.0,0.0,0.0]\n", + " elif(value['age'] > 22 and value['age'] <= 31):\n", + " return [0.0,1.0,0.0,0.0]\n", + " elif(value['age'] > 31 and value['age'] <= 39):\n", + " return [0.0,0.0,1.0,0.0]\n", + " else:\n", + " return [0.0,0.0,0.0,1.0]\n", + " \n", + "data['label'] = data.apply(lambda row: mapAgeToClass2(row), axis=1)\n", + "data.head()\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": { + "id": "2CVdpLTWmzv8" + }, + "outputs": [], + "source": [ + "X = list(data['text'])\n", + "Y = list(data['label'])\n", + "\n", + "X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2)\n", + "X_train_tokenized = tokenizer(X_train, padding=True, truncation=True, max_length=512)\n", + "X_val_tokenized = tokenizer(X_val, padding=True, truncation=True, max_length=512)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": { + "id": "WAuTGUvKmzv9" + }, + "outputs": [], + "source": [ + "class Dataset(torch.utils.data.Dataset):\n", + " def __init__(self, encodings, labels=None):\n", + " self.encodings = encodings\n", + " self.labels = labels\n", + "\n", + " def __getitem__(self, idx):\n", + " item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}\n", + " if self.labels:\n", + " item[\"labels\"] = torch.tensor(self.labels[idx])\n", + " return item\n", + "\n", + " def __len__(self):\n", + " return len(self.encodings[\"input_ids\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": { + "id": "PKJ5TCMTmzv-" + }, + "outputs": [], + "source": [ + "train_dataset = Dataset(X_train_tokenized, y_train)\n", + "val_dataset = Dataset(X_val_tokenized, y_val)" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": { + "id": "QDQ0m8Iomzv_" + }, + "outputs": [], + "source": [ + "def compute_metrics(p):\n", + " pred, labels = p\n", + " pred = np.argmax(pred, axis=1)\n", + " labels = np.argmax(labels, axis=1)\n", + "\n", + " accuracy = accuracy_score(y_true=labels, y_pred=pred)\n", + " recall = recall_score(y_true=labels, y_pred=pred, average='micro')\n", + " precision = precision_score(y_true=labels, y_pred=pred, average='micro')\n", + " f1 = f1_score(y_true=labels, y_pred=pred, average='micro')\n", + "\n", + " return {\"accuracy\": accuracy, \"precision\": precision, \"recall\": recall, \"f1\": f1}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": { + "id": "5gxNl9bvmzwB", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "13e561ad-9da6-4bde-b823-11a7ca7e6184" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "PyTorch: setting up devices\n", + "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n" + ] + } + ], + "source": [ + "args = TrainingArguments(\n", + " output_dir=\"output\",\n", + " evaluation_strategy=\"steps\",\n", + " eval_steps=100,\n", + " per_device_train_batch_size=8,\n", + " per_device_eval_batch_size=8,\n", + " num_train_epochs=3,\n", + " seed=0,\n", + " load_best_model_at_end=True,\n", + ")\n", + "trainer = Trainer(\n", + " model=model,\n", + " args=args,\n", + " train_dataset=train_dataset,\n", + " eval_dataset=val_dataset,\n", + " compute_metrics=compute_metrics,\n", + " callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "dZ8FtrmnmzwB", + "outputId": "5cf81705-3255-4e0c-ee0e-fea255fe0c66" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", + " FutureWarning,\n", + "***** Running training *****\n", + " Num examples = 4000\n", + " Num Epochs = 3\n", + " Instantaneous batch size per device = 8\n", + " Total train batch size (w. parallel, distributed & accumulation) = 8\n", + " Gradient Accumulation steps = 1\n", + " Total optimization steps = 1500\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " [1100/1500 12:04 < 04:23, 1.52 it/s, Epoch 2/3]\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StepTraining LossValidation LossAccuracyPrecisionRecallF1
100No log0.3084950.7210000.7210000.7210000.721000
200No log0.2679070.7930000.7930000.7930000.793000
300No log0.2460320.7860000.7860000.7860000.786000
400No log0.2359760.7960000.7960000.7960000.796000
5000.2970000.2170700.8300000.8300000.8300000.830000
6000.2970000.2322440.8280000.8280000.8280000.828000
7000.2970000.1988910.8530000.8530000.8530000.853000
8000.2970000.2028870.8510000.8510000.8510000.851000
9000.2970000.2287510.8470000.8470000.8470000.847000
10000.1537000.2216750.8500000.8500000.8500000.850000
11000.1537000.2182990.8660000.8660000.8660000.866000

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "Saving model checkpoint to output/checkpoint-500\n", + "Configuration saved in output/checkpoint-500/config.json\n", + "Model weights saved in output/checkpoint-500/pytorch_model.bin\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "Saving model checkpoint to output/checkpoint-1000\n", + "Configuration saved in output/checkpoint-1000/config.json\n", + "Model weights saved in output/checkpoint-1000/pytorch_model.bin\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "\n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "Loading best model from output/checkpoint-500 (score: 0.21706973016262054).\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "TrainOutput(global_step=1100, training_loss=0.212534950429743, metrics={'train_runtime': 724.5874, 'train_samples_per_second': 16.561, 'train_steps_per_second': 2.07, 'total_flos': 2315418864844800.0, 'train_loss': 0.212534950429743, 'epoch': 2.2})" + ] + }, + "metadata": {}, + "execution_count": 110 + } + ], + "source": [ + "trainer.train()" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": { + "id": "1FQxrdS9sGXZ", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 89 + }, + "outputId": "24d69727-de29-431a-85b8-f7a31c394c39" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "***** Running Prediction *****\n", + " Num examples = 1000\n", + " Batch size = 8\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "

\n", + " \n", + " \n", + " [125/125 00:19]\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {} + } + ], + "source": [ + "result = trainer.predict(val_dataset)" + ] + }, + { + "cell_type": "code", + "source": [ + "print(result.metrics)" + ], + "metadata": { + "id": "9QtUEeoVxJkt", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "077812cc-3848-4b56-ac52-a9368920819b" + }, + "execution_count": 112, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{'test_loss': 0.21706973016262054, 'test_accuracy': 0.83, 'test_precision': 0.83, 'test_recall': 0.83, 'test_f1': 0.83, 'test_runtime': 19.3166, 'test_samples_per_second': 51.769, 'test_steps_per_second': 6.471}\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "filename = 'model_encoder.pkl'\n", + "trainer.save_model(filename)" + ], + "metadata": { + "id": "R3yrxs0ANvEQ", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "5ef8e1be-f0d5-4d76-a94f-f3eb29317a92" + }, + "execution_count": 113, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Saving model checkpoint to model_encoder.pkl\n", + "Configuration saved in model_encoder.pkl/config.json\n", + "Model weights saved in model_encoder.pkl/pytorch_model.bin\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Z6ja8jhrmzwI" + }, + "source": [ + "# Model typu decoder" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": { + "id": "tUf06zqBAwXG", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "df99ab7a-ea9b-4e7c-b121-3cfbbafb7347" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: transformers in /usr/local/lib/python3.7/dist-packages (4.16.2)\n", + "Requirement already satisfied: sacremoses in /usr/local/lib/python3.7/dist-packages (from transformers) (0.0.47)\n", + "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.11.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.21.5)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.4.0)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.62.3)\n", + "Requirement already satisfied: tokenizers!=0.11.3,>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.11.5)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.4.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (6.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers) (3.10.0.2)\n", + "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.7)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.7.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.10.8)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.1.0)\n" + ] + } + ], + "source": [ + "!pip install transformers" + ] + }, + { + "cell_type": "code", + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "metadata": { + "id": "wble-rL7Q0Mk", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "76cbc0c5-ec80-4bb3-a26c-dbe3d57b857e" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "HBw75r5XBoui" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score\n", + "import torch\n", + "from transformers import TrainingArguments, Trainer\n", + "from transformers import BertTokenizer, BertForSequenceClassification\n", + "from transformers import EarlyStoppingCallback\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "AFgWRwlimzwJ", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2535ca11-60c0-4c34-f377-2a6ae2c7e18c" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "loading file https://huggingface.co/allenai/longformer-scico/resolve/main/vocab.json from cache at /root/.cache/huggingface/transformers/1ee5ff92bf5d5e992fcf9006e19b6a4ad35d7c8564ef75f4d79a1ed2153273ff.bfdcc444ff249bca1a95ca170ec350b442f81804d7df3a95a2252217574121d7\n", + "loading file https://huggingface.co/allenai/longformer-scico/resolve/main/merges.txt from cache at /root/.cache/huggingface/transformers/0bc7fa46278c9aeb0db119eeac69668e732999ecf7e70938f7fabc0c50da0ed6.f5b91da9e34259b8f4d88dbc97c740667a0e8430b96314460cdb04e86d4fc435\n", + "loading file https://huggingface.co/allenai/longformer-scico/resolve/main/added_tokens.json from cache at /root/.cache/huggingface/transformers/3f461190d4c3e4866b53ee0eb0cc229b7868d365099be2f8e40def2f56f64bd1.b2dabb9d6f1c7ea55d3c9c1c2037f316794ad095778dd06ae6a225cc74100b76\n", + "loading file https://huggingface.co/allenai/longformer-scico/resolve/main/special_tokens_map.json from cache at /root/.cache/huggingface/transformers/d599b9f7e2161f0d2e3b9c8fd9cebef8b07c938f69b08a0a42e78c584f1b4b1e.a11ebb04664c067c8fe5ef8f8068b0f721263414a26058692f7b2e4ba2a1b342\n", + "loading file https://huggingface.co/allenai/longformer-scico/resolve/main/tokenizer_config.json from cache at /root/.cache/huggingface/transformers/da65f1f02a542899b2b4e34dbc660a4afcad000d51ea419fc5fd6a227a122f5e.3f75ee48edc5dac7e53863302122c4a3cee3a14a708eca842a8f62714c185ca5\n", + "loading file https://huggingface.co/allenai/longformer-scico/resolve/main/tokenizer.json from cache at None\n", + "loading configuration file https://huggingface.co/allenai/longformer-scico/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/7e93cc5a6217edd672fdad60f054eec31e6a815697de5adae6b921e1f75836a3.6890559d1ffa3ad48d00eee0ae70669ec630881b293df48322fe4c28c7088c35\n", + "Model config LongformerConfig {\n", + " \"_name_or_path\": \"allenai/longformer-scico\",\n", + " \"architectures\": [\n", + " \"LongformerForSequenceClassification\"\n", + " ],\n", + " \"attention_mode\": \"longformer\",\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"attention_window\": [\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512\n", + " ],\n", + " \"bos_token_id\": 0,\n", + " \"classifier_dropout\": null,\n", + " \"eos_token_id\": 2,\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": \"not related\",\n", + " \"1\": \"coref\",\n", + " \"2\": \"parent\",\n", + " \"3\": \"child\"\n", + " },\n", + " \"ignore_attention_mask\": false,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"child\": \"3\",\n", + " \"coref\": \"1\",\n", + " \"not related\": \"0\",\n", + " \"parent\": \"2\"\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 4098,\n", + " \"model_type\": \"longformer\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"sep_token_id\": 2,\n", + " \"transformers_version\": \"4.16.2\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50267\n", + "}\n", + "\n", + "Adding to the vocabulary\n", + "Adding to the vocabulary\n", + "loading configuration file https://huggingface.co/allenai/longformer-scico/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/7e93cc5a6217edd672fdad60f054eec31e6a815697de5adae6b921e1f75836a3.6890559d1ffa3ad48d00eee0ae70669ec630881b293df48322fe4c28c7088c35\n", + "Model config LongformerConfig {\n", + " \"_name_or_path\": \"allenai/longformer-base-4096\",\n", + " \"architectures\": [\n", + " \"LongformerForSequenceClassification\"\n", + " ],\n", + " \"attention_mode\": \"longformer\",\n", + " \"attention_probs_dropout_prob\": 0.1,\n", + " \"attention_window\": [\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512\n", + " ],\n", + " \"bos_token_id\": 0,\n", + " \"classifier_dropout\": null,\n", + " \"eos_token_id\": 2,\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout_prob\": 0.1,\n", + " \"hidden_size\": 768,\n", + " \"id2label\": {\n", + " \"0\": \"not related\",\n", + " \"1\": \"coref\",\n", + " \"2\": \"parent\",\n", + " \"3\": \"child\"\n", + " },\n", + " \"ignore_attention_mask\": false,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 3072,\n", + " \"label2id\": {\n", + " \"child\": \"3\",\n", + " \"coref\": \"1\",\n", + " \"not related\": \"0\",\n", + " \"parent\": \"2\"\n", + " },\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"max_position_embeddings\": 4098,\n", + " \"model_type\": \"longformer\",\n", + " \"num_attention_heads\": 12,\n", + " \"num_hidden_layers\": 12,\n", + " \"pad_token_id\": 1,\n", + " \"position_embedding_type\": \"absolute\",\n", + " \"problem_type\": \"multi_label_classification\",\n", + " \"sep_token_id\": 2,\n", + " \"transformers_version\": \"4.16.2\",\n", + " \"type_vocab_size\": 1,\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 50267\n", + "}\n", + "\n", + "loading weights file https://huggingface.co/allenai/longformer-scico/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/33709a0b0a44102dd29583428fe5253bf07cbd1ed163757382d471017620ad4d.6fd6d3de002d054747c1a5eb1e2b33e56924ad6db478547c9cf616d11dd48609\n", + "All model checkpoint weights were used when initializing LongformerForSequenceClassification.\n", + "\n", + "All the weights of LongformerForSequenceClassification were initialized from the model checkpoint at allenai/longformer-scico.\n", + "If your task is similar to the task the model of the checkpoint was trained on, you can already use LongformerForSequenceClassification for predictions without further training.\n", + "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py:2882: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version.\n", + "\n", + "\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + "Skipping line 16844: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 19370: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 31753: field larger than field limit (131072)\n", + "Skipping line 33676: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 65976: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 116130: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 127080: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 154052: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 174200: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 189740: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 274245: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 275624: field larger than field limit (131072)\n", + "Skipping line 302668: field larger than field limit (131072)\n", + "Skipping line 307322: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 317541: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 333957: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 345859: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 359845: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 359846: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 359847: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 359849: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 371329: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 384761: field larger than field limit (131072)\n", + "Skipping line 389712: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 391820: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 398927: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 401260: field larger than field limit (131072)\n", + "Skipping line 403079: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 454667: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 465419: field larger than field limit (131072)\n", + "Skipping line 466152: field larger than field limit (131072)\n", + "Skipping line 485309: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 529874: field larger than field limit (131072)\n", + "Skipping line 552169: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 554628: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 560429: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 589855: field larger than field limit (131072)\n", + "Skipping line 601507: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 614020: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 630106: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 632882: field larger than field limit (131072)\n", + "Skipping line 637573: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 658667: field larger than field limit (131072)\n" + ] + } + ], + "source": [ + "from transformers import LongformerTokenizer, LongformerForSequenceClassification\n", + "\n", + "model_name = \"allenai/longformer-scico\"\n", + "tokenizer = LongformerTokenizer.from_pretrained(model_name)\n", + "model = LongformerForSequenceClassification.from_pretrained(model_name, problem_type=\"multi_label_classification\")\n", + "\n", + "\"\"\"\n", + "1 - 22 -> 1 klasa\n", + "23 - 31 -> 2 klasa\n", + "32 - 39 -> 3 klasa \n", + "40 - 48 -> 4 klasa\n", + "\"\"\"\n", + "\n", + "def mapAgeToClass2(value: pd.DataFrame):\n", + " if(value['age'] <=22):\n", + " return [1.0,0.0,0.0,0.0]\n", + " elif(value['age'] > 22 and value['age'] <= 31):\n", + " return [0.0,1.0,0.0,0.0]\n", + " elif(value['age'] > 31 and value['age'] <= 39):\n", + " return [0.0,0.0,1.0,0.0]\n", + " else:\n", + " return [0.0,0.0,0.0,1.0]\n", + "\n", + "data_path = 'drive/MyDrive/blogtext.csv'\n", + "\n", + "data = pd.read_csv(data_path, error_bad_lines=False, engine='python')\n", + "data = data[:data_amount]\n", + "data['label'] = data.apply(lambda row: mapAgeToClass2(row), axis=1)\n", + "\n", + "\n", + "X = list(data['text'])\n", + "Y = list(data['label'])\n", + "if (torch.cuda.is_available()):\n", + " device = \"cuda:0\"\n", + " torch.cuda.empty_cache()\n", + "\n", + "\n", + "X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2)\n", + "\n", + "X_train_tokenized = tokenizer(X_train, padding=True, truncation=True, max_length=1024)\n", + "X_val_tokenized = tokenizer(X_val, padding=True, truncation=True, max_length=1024)\n", + "\n", + "class Dataset(torch.utils.data.Dataset):\n", + " def __init__(self, encodings, labels=None):\n", + " self.encodings = encodings\n", + " self.labels = labels\n", + "\n", + " def __getitem__(self, idx):\n", + " item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}\n", + " if self.labels:\n", + " item[\"labels\"] = torch.tensor(self.labels[idx])\n", + " return item\n", + "\n", + " def __len__(self):\n", + " return len(self.encodings[\"input_ids\"])\n", + "\n", + "train_dataset = Dataset(X_train_tokenized, y_train)\n", + "val_dataset = Dataset(X_val_tokenized, y_val)\n", + "\n", + "def compute_metrics(p):\n", + " pred, labels = p\n", + " pred = np.argmax(pred, axis=1)\n", + " labels = np.argmax(labels, axis=1)\n", + "\n", + " accuracy = accuracy_score(y_true=labels, y_pred=pred)\n", + " recall = recall_score(y_true=labels, y_pred=pred, average='micro')\n", + " precision = precision_score(y_true=labels, y_pred=pred, average='micro')\n", + " f1 = f1_score(y_true=labels, y_pred=pred, average='micro')\n", + "\n", + " return {\"accuracy\": accuracy, \"precision\": precision, \"recall\": recall, \"f1\": f1}\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "b-3I70XlmzwK", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "845a39fe-7c42-41bd-e84c-469e882006ae" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "PyTorch: setting up devices\n", + "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n" + ] + } + ], + "source": [ + "args = TrainingArguments(\n", + " output_dir=\"output\",\n", + " evaluation_strategy=\"steps\",\n", + " eval_steps=500,\n", + " per_device_train_batch_size=1,\n", + " per_device_eval_batch_size=1,\n", + " num_train_epochs=3,\n", + " seed=0,\n", + " load_best_model_at_end=True\n", + ")\n", + "trainer = Trainer(\n", + " model=model,\n", + " args=args,\n", + " train_dataset=train_dataset,\n", + " eval_dataset=val_dataset,\n", + " compute_metrics=compute_metrics,\n", + " callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "4lvaWP9RmzwK", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "6071f1b3-5266-4ab6-ea1f-be1acd73cf01" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", + " FutureWarning,\n", + "***** Running training *****\n", + " Num examples = 4000\n", + " Num Epochs = 3\n", + " Instantaneous batch size per device = 1\n", + " Total train batch size (w. parallel, distributed & accumulation) = 1\n", + " Gradient Accumulation steps = 1\n", + " Total optimization steps = 12000\n", + "Initializing global attention on CLS token...\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " [ 2000/12000 18:08 < 1:30:47, 1.84 it/s, Epoch 0/3]\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StepTraining LossValidation LossAccuracyPrecisionRecallF1
5000.5907000.5259440.6250000.6250000.6250000.625000
10000.5935000.6252580.6250000.6250000.6250000.625000
15000.6035000.6352950.6250000.6250000.6250000.625000
20000.6184000.5881480.6250000.6250000.6250000.625000

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[1;30;43mStrumieniowane dane wyjściowe obcięte do 5000 ostatnich wierszy.\u001b[0m\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Saving model checkpoint to output/checkpoint-500\n", + "Configuration saved in output/checkpoint-500/config.json\n", + "Model weights saved in output/checkpoint-500/pytorch_model.bin\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 1\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Saving model checkpoint to output/checkpoint-1000\n", + "Configuration saved in output/checkpoint-1000/config.json\n", + "Model weights saved in output/checkpoint-1000/pytorch_model.bin\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 1\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Saving model checkpoint to output/checkpoint-1500\n", + "Configuration saved in output/checkpoint-1500/config.json\n", + "Model weights saved in output/checkpoint-1500/pytorch_model.bin\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 1\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Saving model checkpoint to output/checkpoint-2000\n", + "Configuration saved in output/checkpoint-2000/config.json\n", + "Model weights saved in output/checkpoint-2000/pytorch_model.bin\n", + "\n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "Loading best model from output/checkpoint-500 (score: 0.5259436964988708).\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "TrainOutput(global_step=2000, training_loss=0.6015312805175781, metrics={'train_runtime': 1088.7725, 'train_samples_per_second': 11.022, 'train_steps_per_second': 11.022, 'total_flos': 1313722122240000.0, 'train_loss': 0.6015312805175781, 'epoch': 0.5})" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "trainer.train()" + ] + }, + { + "cell_type": "code", + "source": [ + "result = trainer.predict(val_dataset)" + ], + "metadata": { + "id": "YTeHJ_c6I2iy", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "d0ea9cfe-c6c1-46d4-e606-5dcaa8a6e2ed" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "***** Running Prediction *****\n", + " Num examples = 1000\n", + " Batch size = 1\n", + "Initializing global attention on CLS token...\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "

\n", + " \n", + " \n", + " [1000/1000 03:28]\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n", + "Initializing global attention on CLS token...\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(result.metrics)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "RWdJsGGYz9-p", + "outputId": "f82c0567-8ab0-4568-90c7-6545d73e6d99" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{'test_loss': 0.5259436964988708, 'test_accuracy': 0.625, 'test_precision': 0.625, 'test_recall': 0.625, 'test_f1': 0.625, 'test_runtime': 96.4013, 'test_samples_per_second': 10.373, 'test_steps_per_second': 10.373}\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "filename='model_decoder'\n", + "trainer.save_model(filename)" + ], + "metadata": { + "id": "SSAnGmAXZGsT", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "3db06c6b-2efd-44f6-a3ff-481c47f36156" + }, + "execution_count": 15, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Saving model checkpoint to model_decoder\n", + "Configuration saved in model_decoder/config.json\n", + "Model weights saved in model_decoder/pytorch_model.bin\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ESiFpWJYzcgC" + }, + "source": [ + "# Model typu encoder-decoder" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "WYz-wVPoz_tJ", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "e25586d5-4b1c-4588-efde-1da9f0a18cfd" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: sentencepiece==0.1.91 in /usr/local/lib/python3.7/dist-packages (0.1.91)\n", + "Requirement already satisfied: transformers in /usr/local/lib/python3.7/dist-packages (4.16.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (6.0)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.4.2)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.62.3)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n", + "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.11.0)\n", + "Requirement already satisfied: tokenizers!=0.11.3,>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.11.5)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.4.0)\n", + "Requirement already satisfied: sacremoses in /usr/local/lib/python3.7/dist-packages (from transformers) (0.0.47)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.21.5)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers) (3.10.0.2)\n", + "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.7)\n", + "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.7.0)\n", + "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.10.8)\n", + "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.1.0)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n" + ] + } + ], + "source": [ + "!pip install sentencepiece==0.1.91\n", + "!pip install transformers" + ] + }, + { + "cell_type": "code", + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "metadata": { + "id": "phhvsUnGYC-o" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "Ylv54tLmBX6a" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score\n", + "import torch\n", + "from transformers import TrainingArguments, Trainer\n", + "from transformers import BertTokenizer, BertForSequenceClassification\n", + "from transformers import EarlyStoppingCallback\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "id": "_-KcWxdqzgF2", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "44fe6f02-c75b-47d1-f856-866f2fd8c51f" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "loading file https://huggingface.co/t5-small/resolve/main/spiece.model from cache at /root/.cache/huggingface/transformers/65fc04e21f45f61430aea0c4fedffac16a4d20d78b8e6601d8d996ebefefecd2.3b69006860e7b5d0a63ffdddc01ddcd6b7c318a6f4fd793596552c741734c62d\n", + "loading file https://huggingface.co/t5-small/resolve/main/added_tokens.json from cache at None\n", + "loading file https://huggingface.co/t5-small/resolve/main/special_tokens_map.json from cache at None\n", + "loading file https://huggingface.co/t5-small/resolve/main/tokenizer_config.json from cache at None\n", + "loading file https://huggingface.co/t5-small/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/06779097c78e12f47ef67ecb728810c2ae757ee0a9efe9390c6419783d99382d.8627f1bd5d270a9fd2e5a51c8bec3223896587cc3cfe13edeabb0992ab43c529\n", + "loading configuration file https://huggingface.co/t5-small/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fe501e8fd6425b8ec93df37767fcce78ce626e34cc5edc859c662350cf712e41.406701565c0afd9899544c1cb8b93185a76f00b31e5ce7f6e18bbaef02241985\n", + "Model config T5Config {\n", + " \"_name_or_path\": \"t5-small\",\n", + " \"architectures\": [\n", + " \"T5WithLMHeadModel\"\n", + " ],\n", + " \"d_ff\": 2048,\n", + " \"d_kv\": 64,\n", + " \"d_model\": 512,\n", + " \"decoder_start_token_id\": 0,\n", + " \"dropout_rate\": 0.1,\n", + " \"eos_token_id\": 1,\n", + " \"feed_forward_proj\": \"relu\",\n", + " \"initializer_factor\": 1.0,\n", + " \"is_encoder_decoder\": true,\n", + " \"layer_norm_epsilon\": 1e-06,\n", + " \"model_type\": \"t5\",\n", + " \"n_positions\": 512,\n", + " \"num_decoder_layers\": 6,\n", + " \"num_heads\": 8,\n", + " \"num_layers\": 6,\n", + " \"output_past\": true,\n", + " \"pad_token_id\": 0,\n", + " \"relative_attention_num_buckets\": 32,\n", + " \"task_specific_params\": {\n", + " \"summarization\": {\n", + " \"early_stopping\": true,\n", + " \"length_penalty\": 2.0,\n", + " \"max_length\": 200,\n", + " \"min_length\": 30,\n", + " \"no_repeat_ngram_size\": 3,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"summarize: \"\n", + " },\n", + " \"translation_en_to_de\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to German: \"\n", + " },\n", + " \"translation_en_to_fr\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to French: \"\n", + " },\n", + " \"translation_en_to_ro\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to Romanian: \"\n", + " }\n", + " },\n", + " \"transformers_version\": \"4.16.2\",\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 32128\n", + "}\n", + "\n", + "loading configuration file https://huggingface.co/t5-small/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fe501e8fd6425b8ec93df37767fcce78ce626e34cc5edc859c662350cf712e41.406701565c0afd9899544c1cb8b93185a76f00b31e5ce7f6e18bbaef02241985\n", + "Model config T5Config {\n", + " \"architectures\": [\n", + " \"T5WithLMHeadModel\"\n", + " ],\n", + " \"d_ff\": 2048,\n", + " \"d_kv\": 64,\n", + " \"d_model\": 512,\n", + " \"decoder_start_token_id\": 0,\n", + " \"dropout_rate\": 0.1,\n", + " \"eos_token_id\": 1,\n", + " \"feed_forward_proj\": \"relu\",\n", + " \"initializer_factor\": 1.0,\n", + " \"is_encoder_decoder\": true,\n", + " \"layer_norm_epsilon\": 1e-06,\n", + " \"model_type\": \"t5\",\n", + " \"n_positions\": 512,\n", + " \"num_decoder_layers\": 6,\n", + " \"num_heads\": 8,\n", + " \"num_layers\": 6,\n", + " \"output_past\": true,\n", + " \"pad_token_id\": 0,\n", + " \"relative_attention_num_buckets\": 32,\n", + " \"task_specific_params\": {\n", + " \"summarization\": {\n", + " \"early_stopping\": true,\n", + " \"length_penalty\": 2.0,\n", + " \"max_length\": 200,\n", + " \"min_length\": 30,\n", + " \"no_repeat_ngram_size\": 3,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"summarize: \"\n", + " },\n", + " \"translation_en_to_de\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to German: \"\n", + " },\n", + " \"translation_en_to_fr\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to French: \"\n", + " },\n", + " \"translation_en_to_ro\": {\n", + " \"early_stopping\": true,\n", + " \"max_length\": 300,\n", + " \"num_beams\": 4,\n", + " \"prefix\": \"translate English to Romanian: \"\n", + " }\n", + " },\n", + " \"transformers_version\": \"4.16.2\",\n", + " \"use_cache\": true,\n", + " \"vocab_size\": 32128\n", + "}\n", + "\n", + "loading weights file https://huggingface.co/t5-small/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/fee5a3a0ae379232608b6eed45d2d7a0d2966b9683728838412caccc41b4b0ed.ddacdc89ec88482db20c676f0861a336f3d0409f94748c209847b49529d73885\n", + "All model checkpoint weights were used when initializing T5ForConditionalGeneration.\n", + "\n", + "All the weights of T5ForConditionalGeneration were initialized from the model checkpoint at t5-small.\n", + "If your task is similar to the task the model of the checkpoint was trained on, you can already use T5ForConditionalGeneration for predictions without further training.\n", + "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py:2882: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version.\n", + "\n", + "\n", + " exec(code_obj, self.user_global_ns, self.user_ns)\n", + "Skipping line 16844: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 19370: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 31753: field larger than field limit (131072)\n", + "Skipping line 33676: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 65976: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 116130: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 127080: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 154052: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 174200: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 189740: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 274245: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 275624: field larger than field limit (131072)\n", + "Skipping line 302668: field larger than field limit (131072)\n", + "Skipping line 307322: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 317541: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 333957: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 345859: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 359845: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 359846: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 359847: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 359849: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 371329: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 384761: field larger than field limit (131072)\n", + "Skipping line 389712: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 391820: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 398927: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 401260: field larger than field limit (131072)\n", + "Skipping line 403079: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 454667: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 465419: field larger than field limit (131072)\n", + "Skipping line 466152: field larger than field limit (131072)\n", + "Skipping line 485309: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 529874: field larger than field limit (131072)\n", + "Skipping line 552169: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 554628: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 560429: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 589855: field larger than field limit (131072)\n", + "Skipping line 601507: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 614020: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 630106: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 632882: field larger than field limit (131072)\n", + "Skipping line 637573: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n", + "Skipping line 658667: field larger than field limit (131072)\n" + ] + } + ], + "source": [ + "from transformers import T5Tokenizer, T5ForConditionalGeneration\n", + "from transformers import EvalPrediction\n", + "\n", + "model_name = \"t5-small\"\n", + "tokenizer = T5Tokenizer.from_pretrained(model_name)\n", + "model = T5ForConditionalGeneration.from_pretrained(model_name)\n", + "\n", + "\"\"\"\n", + "1 - 22 -> 1 klasa\n", + "23 - 31 -> 2 klasa\n", + "32 - 39 -> 3 klasa \n", + "40 - 48 -> 4 klasa\n", + "\"\"\"\n", + "\n", + "def mapAgeToClass2(value: pd.DataFrame):\n", + " if(value['age'] <=22):\n", + " # return [1,0,0,0]\n", + " return 'class1'\n", + " elif(value['age'] > 22 and value['age'] <= 31):\n", + " # return [0,1,0,0]\n", + " return 'class2'\n", + " elif(value['age'] > 31 and value['age'] <= 39):\n", + " # return [0,0,1,0]\n", + " return 'class3'\n", + " else:\n", + " # return [0,0,0,1]\n", + " return 'class4'\n", + "\n", + "data_path = 'drive/MyDrive/blogtext.csv'\n", + "\n", + "data = pd.read_csv(data_path, error_bad_lines=False, engine='python')\n", + "data = data[:data_amount]\n", + "data['label'] = data.apply(lambda row: mapAgeToClass2(row), axis=1)\n", + "\n", + "\n", + "X = list(data['text'])\n", + "Y = list(data['label'])\n", + "if (torch.cuda.is_available()):\n", + " device = \"cuda:0\"\n", + " torch.cuda.empty_cache()\n", + "\n", + "\n", + "X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2)\n", + "\n", + "X_train_tokenized = tokenizer(X_train, padding=True, truncation=True, max_length=1024)\n", + "X_val_tokenized = tokenizer(X_val, padding=True, truncation=True, max_length=1024)\n", + "\n", + "class Dataset(torch.utils.data.Dataset):\n", + " def __init__(self, encodings, labels=None):\n", + " self.encodings = encodings\n", + " self.labels = labels\n", + "\n", + " def __getitem__(self, idx):\n", + " item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}\n", + " if self.labels:\n", + " item[\"labels\"] = torch.tensor(tokenizer(self.labels[idx])['input_ids'])\n", + " return item\n", + "\n", + " def __len__(self):\n", + " return len(self.encodings[\"input_ids\"])\n", + "\n", + "train_dataset = Dataset(X_train_tokenized, y_train)\n", + "val_dataset = Dataset(X_val_tokenized, y_val)\n", + "\n", + "def compute_metrics(pred):\n", + " labels_ids = pred.label_ids\n", + " pred_ids = pred.predictions\n", + "\n", + " pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)\n", + " label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)\n", + "\n", + " accuracy = sum([int(pred_str[i] == label_str[i]) for i in range(len(pred_str))]) / len(pred_str)\n", + "\n", + " return {\"accuracy\": accuracy}\n", + "\n" + ] + }, + { + "cell_type": "code", + "source": [ + "from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer\n", + "\n", + "args = Seq2SeqTrainingArguments(\n", + " output_dir=\"output\",\n", + " evaluation_strategy=\"steps\",\n", + " eval_steps=50,\n", + " per_device_train_batch_size=8,\n", + " per_device_eval_batch_size=8,\n", + " num_train_epochs=3,\n", + " seed=0,\n", + " load_best_model_at_end=True,\n", + " predict_with_generate=True\n", + ")\n", + "\n", + "trainer = Seq2SeqTrainer(\n", + " model=model,\n", + " args=args,\n", + " train_dataset=train_dataset,\n", + " eval_dataset=val_dataset,\n", + " compute_metrics=compute_metrics\n", + ")" + ], + "metadata": { + "id": "XayaHmAMgI1x", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "4c32a772-96bc-4a43-b406-110c5f311932" + }, + "execution_count": 19, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "PyTorch: setting up devices\n", + "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "id": "8nVY24TCz1Mi", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "b4542048-d208-463a-b088-df9645f8b92d" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", + " FutureWarning,\n", + "***** Running training *****\n", + " Num examples = 4000\n", + " Num Epochs = 3\n", + " Instantaneous batch size per device = 8\n", + " Total train batch size (w. parallel, distributed & accumulation) = 8\n", + " Gradient Accumulation steps = 1\n", + " Total optimization steps = 1500\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " [1500/1500 32:13, Epoch 3/3]\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StepTraining LossValidation LossAccuracy
50No log2.8985110.000000
100No log0.4374330.601000
150No log0.3015480.645000
200No log0.2788920.668000
250No log0.2702450.686000
300No log0.2860850.663000
350No log0.2620660.708000
400No log0.2572510.697000
450No log0.2528480.710000
5001.0576000.2485040.701000
5501.0576000.2515630.721000
6001.0576000.2395080.731000
6501.0576000.2354620.738000
7001.0576000.2461520.734000
7501.0576000.2374330.733000
8001.0576000.2341270.752000
8501.0576000.2247850.760000
9001.0576000.2226180.747000
9501.0576000.2171100.770000
10000.2666000.2143050.765000
10500.2666000.2138130.771000
11000.2666000.2122080.774000
11500.2666000.2110070.772000
12000.2666000.2104510.768000
12500.2666000.2104600.768000
13000.2666000.2145610.769000
13500.2666000.2104500.767000
14000.2666000.2092760.767000
14500.2666000.2100690.769000
15000.2447000.2100560.766000

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "Saving model checkpoint to output/checkpoint-500\n", + "Configuration saved in output/checkpoint-500/config.json\n", + "Model weights saved in output/checkpoint-500/pytorch_model.bin\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "Saving model checkpoint to output/checkpoint-1000\n", + "Configuration saved in output/checkpoint-1000/config.json\n", + "Model weights saved in output/checkpoint-1000/pytorch_model.bin\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "***** Running Evaluation *****\n", + " Num examples = 1000\n", + " Batch size = 8\n", + "Saving model checkpoint to output/checkpoint-1500\n", + "Configuration saved in output/checkpoint-1500/config.json\n", + "Model weights saved in output/checkpoint-1500/pytorch_model.bin\n", + "\n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n", + "Loading best model from output/checkpoint-1500 (score: 0.2100560963153839).\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "TrainOutput(global_step=1500, training_loss=0.5229549509684245, metrics={'train_runtime': 1934.1295, 'train_samples_per_second': 6.204, 'train_steps_per_second': 0.776, 'total_flos': 3248203235328000.0, 'train_loss': 0.5229549509684245, 'epoch': 3.0})" + ] + }, + "metadata": {}, + "execution_count": 20 + } + ], + "source": [ + "trainer.train()" + ] + }, + { + "cell_type": "code", + "source": [ + "result = trainer.predict(val_dataset)" + ], + "metadata": { + "id": "yBrHzXzhaKvk", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 89 + }, + "outputId": "92a6c3ea-4695-4dcf-fe71-9621fadc9906" + }, + "execution_count": 21, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "***** Running Prediction *****\n", + " Num examples = 1000\n", + " Batch size = 8\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "

\n", + " \n", + " \n", + " [125/125 00:33]\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(result.metrics)" + ], + "metadata": { + "id": "nzm2vx86llKw", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "6be67f3e-043c-423c-c81a-8686d59a656e" + }, + "execution_count": 22, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{'test_loss': 0.2100560963153839, 'test_accuracy': 0.766, 'test_runtime': 45.1374, 'test_samples_per_second': 22.155, 'test_steps_per_second': 2.769}\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "filename='model_encoder_decoder'\n", + "trainer.save_model(filename)" + ], + "metadata": { + "id": "LWpjAH_YaL66", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "e5c07d60-c441-4dc1-8ce0-66e815823a68" + }, + "execution_count": 23, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Saving model checkpoint to model_encoder_decoder\n", + "Configuration saved in model_encoder_decoder/config.json\n", + "Model weights saved in model_encoder_decoder/pytorch_model.bin\n" + ] + } + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "main.ipynb", + "provenance": [], + "machine_shape": "hm" + }, + "interpreter": { + "hash": "f4394274b6de412f99b9d08dfb473204abc12afd5637ebb20c9ad8dbd67e97a0" + }, + "kernelspec": { + "display_name": "Python 3.10.1 64-bit ('venv': venv)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.12" } - ], - "source": [ - "trainer.train()" - ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "raw_pred, _, _ = trainer.predict(val_dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "y_pred = np.argmax(raw_pred, axis=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Model typu decoder" - ] - } - ], - "metadata": { - "interpreter": { - "hash": "f4394274b6de412f99b9d08dfb473204abc12afd5637ebb20c9ad8dbd67e97a0" - }, - "kernelspec": { - "display_name": "Python 3.10.1 64-bit ('venv': venv)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.12" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file