"
+ "cell_type": "code",
+ "source": [
+ "data_amount = 5000"
],
- "text/plain": [
- " id gender age topic sign date \\\n",
- "0 2059027 male 15 Student Leo 14,May,2004 \n",
- "1 2059027 male 15 Student Leo 13,May,2004 \n",
- "2 2059027 male 15 Student Leo 12,May,2004 \n",
- "3 2059027 male 15 Student Leo 12,May,2004 \n",
- "4 3581210 male 33 InvestmentBanking Aquarius 11,June,2004 \n",
- "\n",
- " text \n",
- "0 Info has been found (+/- 100 pages,... \n",
- "1 These are the team members: Drewe... \n",
- "2 In het kader van kernfusie op aarde... \n",
- "3 testing!!! testing!!! \n",
- "4 Thanks to Yahoo!'s Toolbar I can ... "
- ]
- },
- "execution_count": 2,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "data = pd.read_csv(\"data/blogtext.csv\")\n",
- "data = data[:100]\n",
- "data.head()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Model typu encoder (BertForSequenceClassification)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.weight']\n",
- "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
- "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
- "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
- "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
- ]
- }
- ],
- "source": [
- "model_name = 'bert-base-uncased'\n",
- "tokenizer = BertTokenizer.from_pretrained(model_name)\n",
- "model = BertForSequenceClassification.from_pretrained(model_name, problem_type=\"multi_label_classification\", num_labels=4)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 55,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEICAYAAABRSj9aAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAbqElEQVR4nO3df5RcZZ3n8feHhAShFQhgL0KUCMnOCeMsY5oEZpXpFgcadiTObJhN88OwA5tVJ3N29Lizcd2JJLp7Dq4L4x5wNLOwMIDpZOPqZLUdQEkfRpcfIcqvBhKaiJKIsNBELBFj4Lt/3CdOTVnVVd1V1dU8+bzOqdP3x/Pc+62bW5+6/VT1jSICMzPL1yGdLsDMzNrLQW9mljkHvZlZ5hz0ZmaZc9CbmWXOQW9mljkHvXWcpBFJvZ2uo5Mk/YGkpyWVJP12p+uxvDjora0kPSXpvRXLLpP07QPzEXFqRAzX2c5JkkLSzDaV2mmfBVZFRFdEfK9aAxV2SXp0imuz1zkHvRkwDd5A3gaM1GlzFvBm4O2STm9/SZYLB711XPlVv6TFku6X9JKkZyVdnZrdlX7uTcMbZ0o6RNJ/kvQDSc9J+htJR5Zt9wNp3QuS/qJiP1dK2izpFkkvAZelfd8taa+kZyRdK2lW2fZC0oclPSHpp5I+JelkSf831bupvH3Fc6xaq6TZkkrADOBBSU+Oc6hWAH8LDKXp8u3Pk3RXquubkq6TdEvZ+jNSnXslPXiwD5UdbBz0Nt18DvhcRLwJOBnYlJaflX4elYY37gYuS48+4O1AF3AtgKSFwOeBi4HjgSOBEyr2tRTYDBwF3Aq8CnwEOBY4Ezgb+HBFn3OBRcAZwJ8D64FLgLnAbwIDNZ5X1Voj4hcR0ZXa/LOIOLlaZ0mHA8tSnbcCyyveVL4E3AccA1wJXFrW9wTg68CngTnAx4AvSzquRq2WGQe9TYWvpivJvZL2UgRwLb8ETpF0bESUIuKecdpeDFwdEbsiogR8nCIAZ1KE4v+JiG9HxD5gDVB5Y6e7I+KrEfFaRPw8IrZHxD0RsT8ingK+CPxuRZ/PRMRLETECPALcnvb/E+AbQK0PUsertRF/CPwCuJ0itA8F/gWApLcCpwNrImJfRHwb2FLW9xJgKCKG0nO9A7gfOL/BfdvrnIPepsL7I+KoAw9+/Sq53OXAAuBxSdsk/f44bd8C/KBs/gfATKA7rXv6wIqIeBl4oaL/0+UzkhZI+pqkH6fhnP9CcXVf7tmy6Z9Xme+iuvFqbcQKYFN6E3oF+DL/MHzzFmAsPccDyp/b24ALK95s30Xxm44dBDr9AZTZPxIRTwADkg6huIrdLOkYfv1qHOBHFCF2wFuB/RTh+wzwTw+skPQGimGNf7S7ivm/Ar4HDETETyX9GcVvBq0wXq3jknQi8B5gsaR/mRYfDhwm6ViK5zpH0uFlYT+3bBNPAzdHxL9p8jnY65Sv6G1akXSJpOMi4jVgb1r8GvD/0s+3lzXfAHwkfRDZRXEFvjEi9lOMvb9P0u+ksewrAdXZ/RuBl4CSpN8APtSip1Wv1nouBXZSvHGdlh4LgN0Ub0o/oBiKuVLSLElnAu8r638LxbE4V9IMSYdJ6k1vIHYQcNDbdNMPjKRvonwOWJ7Gz18G/jPwnTT8cAZwA3AzxTdyvg+8AvwpQBpD/1NgkOKKtwQ8RzHOXcvHgIuAnwJ/DWxs4fOqWWsDVgCfj4gflz+AL/APwzcXU3yA/ALFh64bSc81Ip6m+OD5P1K8YT4N/Hv8+j9oyP/xiB0M0lX0XmB+RHy/w+W0naSNwOMR8clO12Kd53d0y5ak90k6XNIRFH95+jDwVGerag9Jp6fv9B8iqZ/iCv6rHS7LpgkHveVsKcWHoD8C5lMMA+X6K+w/AYYphqj+O/ChWrdSsIOPh27MzDLnK3ozs8xNu+/RH3vssXHSSSd1bP8/+9nPOOKIIzq2/3pcX3NcX3NcX3PaWd/27dufj4jqt7WIiGn1WLRoUXTS1q1bO7r/elxfc1xfc1xfc9pZH3B/1MhVD92YmWXOQW9mljkHvZlZ5hz0ZmaZc9CbmWXOQW9mljkHvZlZ5hz0ZmaZc9CbmWVu2t0Cwczy0dc3tfsbGIC1a6d2nxNRr76tW9uzX1/Rm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpa5hoJeUr+kHZJGJa2usv4sSd+VtF/Ssirr3yRpt6RrW1G0mZk1rm7QS5oBXAecBywEBiQtrGj2Q+Ay4Es1NvMp4K7Jl2lmZpPVyBX9YmA0InZFxD5gEFha3iAinoqIh4DXKjtLWgR0A7e3oF4zM5sgFf/V4DgNiqGY/oi4Is1fCiyJiFVV2t4IfC0iNqf5Q4A7gUuA9wI9NfqtBFYCdHd3LxocHGzmOTWlVCrR1dXVsf3X4/qa4/qaM9H6du5sYzFVzJlTYmxs+h6/evUtWDD5bff19W2PiJ5q69p9C4QPA0MRsVtSzUYRsR5YD9DT0xO9vb1tLqu24eFhOrn/elxfc1xfcyZa31TfjmBgYJgNG3qndqcTUK++dt0CoZGg3wPMLZs/MS1rxJnAuyV9GOgCZkkqRcSvfaBrZmbt0UjQbwPmS5pHEfDLgYsa2XhEXHxgWtJlFEM3DnkzsylU98PYiNgPrAJuAx4DNkXEiKR1ki4AkHS6pN3AhcAXJY20s2gzM2tcQ2P0ETEEDFUsW1M2vY1iSGe8bdwI3DjhCs3MrCn+y1gzs8w56M3MMuegNzPLnIPezCxzDnozs8w56M3MMuegNzPLnIPezCxzDnozs8w56M3MMuegNzPLnIPezCxzDnozs8w56M3MMuegNzPLnIPezCxzDnozs8w56M3MMuegNzPLXENBL6lf0g5Jo5JWV1l/lqTvStovaVnZ8tMk3S1pRNJDkv5VK4s3M7P66ga9pBnAdcB5wEJgQNLCimY/BC4DvlSx/GXgAxFxKtAP/KWko5qs2czMJmBmA20WA6MRsQtA0iCwFHj0QIOIeCqte628Y0TsLJv+kaTngOOAvc0WbmZmjVFEjN+gGIrpj4gr0vylwJKIWFWl7Y3A1yJic5V1i4GbgFMj4rWKdSuBlQDd3d2LBgcHJ/dsWqBUKtHV1dWx/dfj+prj+poz0fp27qzfppXmzCkxNjZ9j1+9+hYsmPy2+/r6tkdET7V1jVzRN03S8cDNwIrKkAeIiPXAeoCenp7o7e2dirKqGh4eppP7r8f1Ncf1NWei9a1d275aqhkYGGbDht6p3ekE1Ktv69b27LeRD2P3AHPL5k9Myxoi6U3A14FPRMQ9EyvPzMya1UjQbwPmS5onaRawHNjSyMZT+68Af1NtOMfMzNqvbtBHxH5gFXAb8BiwKSJGJK2TdAGApNMl7QYuBL4oaSR1/yPgLOAySQ+kx2nteCJmZlZdQ2P0ETEEDFUsW1M2vY1iSKey3y3ALU3WaGZmTfBfxpqZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmGgp6Sf2SdkgalbS6yvqzJH1X0n5JyyrWrZD0RHqsaFXhZmbWmLpBL2kGcB1wHrAQGJC0sKLZD4HLgC9V9J0DfBJYAiwGPinp6ObLNjOzRjVyRb8YGI2IXRGxDxgElpY3iIinIuIh4LWKvucCd0TEWES8CNwB9LegbjMza5AiYvwGxVBMf0RckeYvBZZExKoqbW8EvhYRm9P8x4DDIuLTaf4vgJ9HxGcr+q0EVgJ0d3cvGhwcbPZ5TVqpVKKrq6tj+6/H9TXH9TVnovXt3NnGYqqYM6fE2Nj0PX716luwYPLb7uvr2x4RPdXWzZz8ZlsnItYD6wF6enqit7e3Y7UMDw/Tyf3X4/qa4/qaM9H61q5tXy3VDAwMs2FD79TudALq1bd1a3v228jQzR5gbtn8iWlZI5rpa2ZmLdBI0G8D5kuaJ2kWsBzY0uD2bwPOkXR0+hD2nLTMzMymSN2gj4j9wCqKgH4M2BQRI5LWSboAQNLpknYDFwJflDSS+o4Bn6J4s9gGrEvLzMxsijQ0Rh8RQ8BQxbI1ZdPbKIZlqvW9AbihiRrNzKwJ/stYM7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy1xDQS+pX9IOSaOSVldZP1vSxrT+XkknpeWHSrpJ0sOSHpP08RbXb2ZmddQNekkzgOuA84CFwICkhRXNLgdejIhTgGuAq9LyC4HZEfEOYBHwbw+8CZiZ2dRo5Ip+MTAaEbsiYh8wCCytaLMUuClNbwbOliQggCMkzQTeAOwDXmpJ5WZm1hBFxPgNpGVAf0RckeYvBZZExKqyNo+kNrvT/JPAEuAnwM3A2cDhwEciYn2VfawEVgJ0d3cvGhwcbMFTm5xSqURXV1fH9l+P62uO62vOROvbubONxVQxZ06JsbHpe/zq1bdgweS33dfXtz0ieqqtmzn5zTZkMfAq8BbgaODvJX0zInaVN0rhvx6gp6cnent721xWbcPDw3Ry//W4vua4vuZMtL61a9tXSzUDA8Ns2NA7tTudgHr1bd3anv02MnSzB5hbNn9iWla1TRqmORJ4AbgI+LuI+GVEPAd8B6j6jmNmZu3RSNBvA+ZLmidpFrAc2FLRZguwIk0vA+6MYkzoh8B7ACQdAZwBPN6Kws3MrDF1gz4i9gOrgNuAx4BNETEiaZ2kC1Kz64FjJI0CHwUOfAXzOqBL0gjFG8b/jIiHWv0kzMystobG6CNiCBiqWLambPoViq9SVvYrVVtuZmZTx38Za2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplz0JuZZc5Bb2aWOQe9mVnmHPRmZplrKOgl9UvaIWlU0uoq62dL2pjW3yvppLJ1vyXpbkkjkh6WdFgL6zczszrqBr2kGcB1wHnAQmBA0sKKZpcDL0bEKcA1wFWp70zgFuCDEXEq0Av8smXVm5lZXY1c0S8GRiNiV0TsAwaBpRVtlgI3penNwNmSBJwDPBQRDwJExAsR8WprSjczs0YoIsZvIC0D+iPiijR/KbAkIlaVtXkktdmd5p8ElgCXAIuANwPHAYMR8Zkq+1gJrATo7u5eNDg42IKnNjmlUomurq6O7b8e19cc19ecida3c2cbi6lizpwSY2PT9/jVq2/Bgslvu6+vb3tE9FRbN3Pym23ITOBdwOnAy8C3JG2PiG+VN4qI9cB6gJ6enujt7W1zWbUNDw/Tyf3X4/qa4/qaM9H61q5tXy3VDAwMs2FD79TudALq1bd1a3v220jQ7wHmls2fmJZVa7M7jcsfCbwA7AbuiojnASQNAe8EvkWb9PU1139gYOpPzolodX3tOrHMbPpoZIx+GzBf0jxJs4DlwJaKNluAFWl6GXBnFGNCtwHvkHR4egP4XeDR1pRuZmaNqHtFHxH7Ja2iCO0ZwA0RMSJpHXB/RGwBrgduljQKjFG8GRARL0q6muLNIoChiPh6m56LmZlV0dAYfUQMAUMVy9aUTb8CXFij7y0UX7E0M7MO8F/GmpllzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYc9GZmmXPQm5llzkFvZpY5B72ZWeYaCnpJ/ZJ2SBqVtLrK+tmSNqb190o6qWL9WyWVJH2sRXWbmVmD6ga9pBnAdcB5wEJgQNLCimaXAy9GxCnANcBVFeuvBr7RfLlmZjZRjVzRLwZGI2JXROwDBoGlFW2WAjel6c3A2ZIEIOn9wPeBkZZUbGZmE9JI0J8APF02vzstq9omIvYDPwGOkdQF/AdgbfOlmpnZZCgixm8gLQP6I+KKNH8psCQiVpW1eSS12Z3mnwSWAKuB+yJik6QrgVJEfLbKPlYCKwG6u7sXDQ4OTvoJ7dw56a4AzJlTYmysq7mNtFGr61uwoGWbAqBUKtHVNX2Pn+trzkTra/b1OFGv99dvM6/Hvr6+7RHRU23dzAb67wHmls2fmJZVa7Nb0kzgSOAFirBfJukzwFHAa5JeiYhryztHxHpgPUBPT0/09vY2UFZ1a5v83WFgYJgNGya//3ZrdX1bt7ZsUwAMDw/TzL9fu7m+5ky0vmZfjxP1en/9tvr1eEAjQb8NmC9pHkWgLwcuqmizBVgB3A0sA+6M4leFdx9oUHZFfy1mZjZl6gZ9ROyXtAq4DZgB3BARI5LWAfdHxBbgeuBmSaPAGMWbgZmZTQONXNETEUPAUMWyNWXTrwAX1tnGlZOoz8zMmuS/jDUzy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLXUNBL6pe0Q9KopNVV1s+WtDGtv1fSSWn570naLunh9PM9La7fzMzqqBv0kmYA1wHnAQuBAUkLK5pdDrwYEacA1wBXpeXPA++LiHcAK4CbW1W4mZk1ppEr+sXAaETsioh9wCCwtKLNUuCmNL0ZOFuSIuJ7EfGjtHwEeIOk2a0o3MzMGqOIGL+BtAzoj4gr0vylwJKIWFXW5pHUZneafzK1eb5iOx+MiPdW2cdKYCVAd3f3osHBwUk/oZ07J90VgDlzSoyNdTW3kTZqdX0LFrRsUwCUSiW6uqbv8XN9zZlofc2+Hifq9f76beb12NfXtz0ieqqtmzn5zTZO0qkUwznnVFsfEeuB9QA9PT3R29s76X2tXTvprgAMDAyzYcPk999ura5v69aWbQqA4eFhmvn3azfX15yJ1tfs63GiXu+v31a/Hg9oZOhmDzC3bP7EtKxqG0kzgSOBF9L8icBXgA9ExJPNFmxmZhPTSNBvA+ZLmidpFrAc2FLRZgvFh60Ay4A7IyIkHQV8HVgdEd9pUc1mZjYBdYM+IvYDq4DbgMeATRExImmdpAtSs+uBYySNAh8FDnwFcxVwCrBG0gPp8eaWPwszM6upoTH6iBgChiqWrSmbfgW4sEq/TwOfbrJGMzNrgv8y1swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy9yU/OfgZrno62vv9gcGpv4/1J6I6V6fVecrejOzzDnozcwy11DQS+qXtEPSqKTVVdbPlrQxrb9X0kll6z6elu+QdG4LazczswbUDXpJM4DrgPOAhcCApIUVzS4HXoyIU4BrgKtS34XAcuBUoB/4fNqemZlNkUau6BcDoxGxKyL2AYPA0oo2S4Gb0vRm4GxJSssHI+IXEfF9YDRtz8zMpkgj37o5AXi6bH43sKRWm4jYL+knwDFp+T0VfU+o3IGklcDKNFuStKOh6ttgeJhjgec7tf96Wl2f1Kot/cq0Pn5M8/oOtvOv1V7v9TX5enxbrRXT4uuVEbEeWN/pOgAk3R8RPZ2uoxbX1xzX1xzX15xO1dfI0M0eYG7Z/IlpWdU2kmYCRwIvNNjXzMzaqJGg3wbMlzRP0iyKD1e3VLTZAqxI08uAOyMi0vLl6Vs584D5wH2tKd3MzBpRd+gmjbmvAm4DZgA3RMSIpHXA/RGxBbgeuFnSKDBG8WZAarcJeBTYD/xJRLzapufSKtNiCGkcrq85rq85rq85HalPxYW3mZnlyn8Za2aWOQe9mVnmDpqgl3SDpOckPVK27EpJeyQ9kB7n1+g77i0g2ljfxrLanpL0QI2+T0l6OLW7vw21zZW0VdKjkkYk/bu0fI6kOyQ9kX4eXaP/itTmCUkrqrVpU33/VdLjkh6S9BVJR9Xo39bjV6fGjp+D49Q2Lc6/tI/DJN0n6cFU49q0fF667cpoqndWjf5tvRXLOPXdmvb5SHqNH1qj/6tlx7ryyy7Ni4iD4gGcBbwTeKRs2ZXAx+r0mwE8CbwdmAU8CCycivoq1v83YE2NdU8Bx7bx2B0PvDNNvxHYSXE7jM8Aq9Py1cBVVfrOAXaln0en6aOnqL5zgJlp+VXV6puK41enxo6fg7Vqmy7nX9qHgK40fShwL3AGsAlYnpZ/AfhQlb4L0zGbDcxLx3LGFNV3flonYEO1+lKfUjuP30FzRR8Rd1F8I2iiGrkFRNPGq0+SgD+iOFGmXEQ8ExHfTdM/BR6j+Avn8ltf3AS8v0r3c4E7ImIsIl4E7qC471Hb64uI2yNif2p2D8XfcXTEOMewEW09B+vV1unzL9UVEVFKs4emRwDvobjtCtQ+B9t+K5Za9UXEUFoXFF8t78g5eNAE/ThWpV/tb6gx9FDtFhCNvkBb5d3AsxHxRI31AdwuabuK20m0jYo7k/42xRVLd0Q8k1b9GOiu0mVKj19FfeX+GPhGjW5Tdvygao3T5hyscfymxfknaUYaPnqO4oLhSWBv2Zt5reMyJcevsr6IuLds3aHApcDf1eh+mKT7Jd0j6f2tru1gD/q/Ak4GTgOeofj1dDoaYPyrqXdFxDsp7jD6J5LOakcRkrqALwN/FhEvla9LVywd/a5urfokfYLi7zhurdF1So5fjRqnzTk4zr/vtDj/IuLViDiN4qp4MfAb7djPZFXWJ+k3y1Z/HrgrIv6+Rve3RXFrhIuAv5R0citrO6iDPiKeTf84rwF/TfVf5zp6GwcVt5T4Q2BjrTYRsSf9fA74Cm24Q2i6IvkycGtE/O+0+FlJx6f1x1NcyVSakuNXoz4kXQb8PnBxejP6NVNx/GrVOF3OwXGO37Q4/yr2txfYCpwJHJVqhNrHZUpfw2X19QNI+iRwHPDRcfocOIa7gGGK36pa5qAO+gMhlfwB8EiVZo3cAqKd3gs8HhG7q62UdISkNx6YpvgAstrzmLQ0Rns98FhEXF22qvzWFyuAv63S/TbgHElHp2GJc9KyttcnqR/4c+CCiHi5Rt+2H786NXb8HBzn3xemwfmXtn2c0remJL0B+D2KzxK2Utx2BWqfg22/FUuN+h6XdAXF51QD6c28Wt+jJc1O08cC/5zibgKt085PeqfTg+JXz2eAX1KM0V0O3Aw8DDxEcTIcn9q+BRgq63s+xTcRngQ+MVX1peU3Ah+saPur+ii+ifFgeoy0oz7gXRTDMg8BD6TH+RS3ov4W8ATwTWBOat8D/I+y/n9M8QHYKPCvp7C+UYqx2QPLvtCJ41enxo6fg7Vqmy7nX9rPbwHfSzU+QvoGUNr/fenf+n8Bs9PyC4B1Zf0/kY7dDuC8Kaxvf9rvgeN6YPmvXiPA76Rz4MH08/JW1+dbIJiZZe6gHroxMzsYOOjNzDLnoDczy5yD3swscw56M7PMOejNzDLnoDczy9z/B8C5FPdY0UE0AAAAAElFTkSuQmCC",
- "text/plain": [
- "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "\u001b[1;30;43mStrumieniowane dane wyjściowe obcięte do 5000 ostatnich wierszy.\u001b[0m\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Saving model checkpoint to output/checkpoint-500\n",
+ "Configuration saved in output/checkpoint-500/config.json\n",
+ "Model weights saved in output/checkpoint-500/pytorch_model.bin\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1000\n",
+ " Batch size = 1\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Saving model checkpoint to output/checkpoint-1000\n",
+ "Configuration saved in output/checkpoint-1000/config.json\n",
+ "Model weights saved in output/checkpoint-1000/pytorch_model.bin\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1000\n",
+ " Batch size = 1\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Saving model checkpoint to output/checkpoint-1500\n",
+ "Configuration saved in output/checkpoint-1500/config.json\n",
+ "Model weights saved in output/checkpoint-1500/pytorch_model.bin\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 1000\n",
+ " Batch size = 1\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Saving model checkpoint to output/checkpoint-2000\n",
+ "Configuration saved in output/checkpoint-2000/config.json\n",
+ "Model weights saved in output/checkpoint-2000/pytorch_model.bin\n",
+ "\n",
+ "\n",
+ "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+ "\n",
+ "\n",
+ "Loading best model from output/checkpoint-500 (score: 0.5259436964988708).\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "TrainOutput(global_step=2000, training_loss=0.6015312805175781, metrics={'train_runtime': 1088.7725, 'train_samples_per_second': 11.022, 'train_steps_per_second': 11.022, 'total_flos': 1313722122240000.0, 'train_loss': 0.6015312805175781, 'epoch': 0.5})"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 10
+ }
+ ],
+ "source": [
+ "trainer.train()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "result = trainer.predict(val_dataset)"
+ ],
+ "metadata": {
+ "id": "YTeHJ_c6I2iy",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "outputId": "d0ea9cfe-c6c1-46d4-e606-5dcaa8a6e2ed"
+ },
+ "execution_count": 13,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "***** Running Prediction *****\n",
+ " Num examples = 1000\n",
+ " Batch size = 1\n",
+ "Initializing global attention on CLS token...\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {}
+ },
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n",
+ "Initializing global attention on CLS token...\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "print(result.metrics)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "RWdJsGGYz9-p",
+ "outputId": "f82c0567-8ab0-4568-90c7-6545d73e6d99"
+ },
+ "execution_count": 14,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "{'test_loss': 0.5259436964988708, 'test_accuracy': 0.625, 'test_precision': 0.625, 'test_recall': 0.625, 'test_f1': 0.625, 'test_runtime': 96.4013, 'test_samples_per_second': 10.373, 'test_steps_per_second': 10.373}\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "filename='model_decoder'\n",
+ "trainer.save_model(filename)"
+ ],
+ "metadata": {
+ "id": "SSAnGmAXZGsT",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "3db06c6b-2efd-44f6-a3ff-481c47f36156"
+ },
+ "execution_count": 15,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "Saving model checkpoint to model_decoder\n",
+ "Configuration saved in model_decoder/config.json\n",
+ "Model weights saved in model_decoder/pytorch_model.bin\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ESiFpWJYzcgC"
+ },
+ "source": [
+ "# Model typu encoder-decoder"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "id": "WYz-wVPoz_tJ",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "e25586d5-4b1c-4588-efde-1da9f0a18cfd"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Requirement already satisfied: sentencepiece==0.1.91 in /usr/local/lib/python3.7/dist-packages (0.1.91)\n",
+ "Requirement already satisfied: transformers in /usr/local/lib/python3.7/dist-packages (4.16.2)\n",
+ "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (6.0)\n",
+ "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.4.2)\n",
+ "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.62.3)\n",
+ "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n",
+ "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.11.0)\n",
+ "Requirement already satisfied: tokenizers!=0.11.3,>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.11.5)\n",
+ "Requirement already satisfied: huggingface-hub<1.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.4.0)\n",
+ "Requirement already satisfied: sacremoses in /usr/local/lib/python3.7/dist-packages (from transformers) (0.0.47)\n",
+ "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n",
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n",
+ "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.21.5)\n",
+ "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers) (3.10.0.2)\n",
+ "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.7)\n",
+ "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.7.0)\n",
+ "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n",
+ "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.10.8)\n",
+ "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n",
+ "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.1.0)\n",
+ "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n",
+ "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install sentencepiece==0.1.91\n",
+ "!pip install transformers"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from google.colab import drive\n",
+ "drive.mount('/content/drive')"
+ ],
+ "metadata": {
+ "id": "phhvsUnGYC-o"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "id": "Ylv54tLmBX6a"
+ },
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score\n",
+ "import torch\n",
+ "from transformers import TrainingArguments, Trainer\n",
+ "from transformers import BertTokenizer, BertForSequenceClassification\n",
+ "from transformers import EarlyStoppingCallback\n",
+ "import matplotlib.pyplot as plt"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "id": "_-KcWxdqzgF2",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "44fe6f02-c75b-47d1-f856-866f2fd8c51f"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "loading file https://huggingface.co/t5-small/resolve/main/spiece.model from cache at /root/.cache/huggingface/transformers/65fc04e21f45f61430aea0c4fedffac16a4d20d78b8e6601d8d996ebefefecd2.3b69006860e7b5d0a63ffdddc01ddcd6b7c318a6f4fd793596552c741734c62d\n",
+ "loading file https://huggingface.co/t5-small/resolve/main/added_tokens.json from cache at None\n",
+ "loading file https://huggingface.co/t5-small/resolve/main/special_tokens_map.json from cache at None\n",
+ "loading file https://huggingface.co/t5-small/resolve/main/tokenizer_config.json from cache at None\n",
+ "loading file https://huggingface.co/t5-small/resolve/main/tokenizer.json from cache at /root/.cache/huggingface/transformers/06779097c78e12f47ef67ecb728810c2ae757ee0a9efe9390c6419783d99382d.8627f1bd5d270a9fd2e5a51c8bec3223896587cc3cfe13edeabb0992ab43c529\n",
+ "loading configuration file https://huggingface.co/t5-small/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fe501e8fd6425b8ec93df37767fcce78ce626e34cc5edc859c662350cf712e41.406701565c0afd9899544c1cb8b93185a76f00b31e5ce7f6e18bbaef02241985\n",
+ "Model config T5Config {\n",
+ " \"_name_or_path\": \"t5-small\",\n",
+ " \"architectures\": [\n",
+ " \"T5WithLMHeadModel\"\n",
+ " ],\n",
+ " \"d_ff\": 2048,\n",
+ " \"d_kv\": 64,\n",
+ " \"d_model\": 512,\n",
+ " \"decoder_start_token_id\": 0,\n",
+ " \"dropout_rate\": 0.1,\n",
+ " \"eos_token_id\": 1,\n",
+ " \"feed_forward_proj\": \"relu\",\n",
+ " \"initializer_factor\": 1.0,\n",
+ " \"is_encoder_decoder\": true,\n",
+ " \"layer_norm_epsilon\": 1e-06,\n",
+ " \"model_type\": \"t5\",\n",
+ " \"n_positions\": 512,\n",
+ " \"num_decoder_layers\": 6,\n",
+ " \"num_heads\": 8,\n",
+ " \"num_layers\": 6,\n",
+ " \"output_past\": true,\n",
+ " \"pad_token_id\": 0,\n",
+ " \"relative_attention_num_buckets\": 32,\n",
+ " \"task_specific_params\": {\n",
+ " \"summarization\": {\n",
+ " \"early_stopping\": true,\n",
+ " \"length_penalty\": 2.0,\n",
+ " \"max_length\": 200,\n",
+ " \"min_length\": 30,\n",
+ " \"no_repeat_ngram_size\": 3,\n",
+ " \"num_beams\": 4,\n",
+ " \"prefix\": \"summarize: \"\n",
+ " },\n",
+ " \"translation_en_to_de\": {\n",
+ " \"early_stopping\": true,\n",
+ " \"max_length\": 300,\n",
+ " \"num_beams\": 4,\n",
+ " \"prefix\": \"translate English to German: \"\n",
+ " },\n",
+ " \"translation_en_to_fr\": {\n",
+ " \"early_stopping\": true,\n",
+ " \"max_length\": 300,\n",
+ " \"num_beams\": 4,\n",
+ " \"prefix\": \"translate English to French: \"\n",
+ " },\n",
+ " \"translation_en_to_ro\": {\n",
+ " \"early_stopping\": true,\n",
+ " \"max_length\": 300,\n",
+ " \"num_beams\": 4,\n",
+ " \"prefix\": \"translate English to Romanian: \"\n",
+ " }\n",
+ " },\n",
+ " \"transformers_version\": \"4.16.2\",\n",
+ " \"use_cache\": true,\n",
+ " \"vocab_size\": 32128\n",
+ "}\n",
+ "\n",
+ "loading configuration file https://huggingface.co/t5-small/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/fe501e8fd6425b8ec93df37767fcce78ce626e34cc5edc859c662350cf712e41.406701565c0afd9899544c1cb8b93185a76f00b31e5ce7f6e18bbaef02241985\n",
+ "Model config T5Config {\n",
+ " \"architectures\": [\n",
+ " \"T5WithLMHeadModel\"\n",
+ " ],\n",
+ " \"d_ff\": 2048,\n",
+ " \"d_kv\": 64,\n",
+ " \"d_model\": 512,\n",
+ " \"decoder_start_token_id\": 0,\n",
+ " \"dropout_rate\": 0.1,\n",
+ " \"eos_token_id\": 1,\n",
+ " \"feed_forward_proj\": \"relu\",\n",
+ " \"initializer_factor\": 1.0,\n",
+ " \"is_encoder_decoder\": true,\n",
+ " \"layer_norm_epsilon\": 1e-06,\n",
+ " \"model_type\": \"t5\",\n",
+ " \"n_positions\": 512,\n",
+ " \"num_decoder_layers\": 6,\n",
+ " \"num_heads\": 8,\n",
+ " \"num_layers\": 6,\n",
+ " \"output_past\": true,\n",
+ " \"pad_token_id\": 0,\n",
+ " \"relative_attention_num_buckets\": 32,\n",
+ " \"task_specific_params\": {\n",
+ " \"summarization\": {\n",
+ " \"early_stopping\": true,\n",
+ " \"length_penalty\": 2.0,\n",
+ " \"max_length\": 200,\n",
+ " \"min_length\": 30,\n",
+ " \"no_repeat_ngram_size\": 3,\n",
+ " \"num_beams\": 4,\n",
+ " \"prefix\": \"summarize: \"\n",
+ " },\n",
+ " \"translation_en_to_de\": {\n",
+ " \"early_stopping\": true,\n",
+ " \"max_length\": 300,\n",
+ " \"num_beams\": 4,\n",
+ " \"prefix\": \"translate English to German: \"\n",
+ " },\n",
+ " \"translation_en_to_fr\": {\n",
+ " \"early_stopping\": true,\n",
+ " \"max_length\": 300,\n",
+ " \"num_beams\": 4,\n",
+ " \"prefix\": \"translate English to French: \"\n",
+ " },\n",
+ " \"translation_en_to_ro\": {\n",
+ " \"early_stopping\": true,\n",
+ " \"max_length\": 300,\n",
+ " \"num_beams\": 4,\n",
+ " \"prefix\": \"translate English to Romanian: \"\n",
+ " }\n",
+ " },\n",
+ " \"transformers_version\": \"4.16.2\",\n",
+ " \"use_cache\": true,\n",
+ " \"vocab_size\": 32128\n",
+ "}\n",
+ "\n",
+ "loading weights file https://huggingface.co/t5-small/resolve/main/pytorch_model.bin from cache at /root/.cache/huggingface/transformers/fee5a3a0ae379232608b6eed45d2d7a0d2966b9683728838412caccc41b4b0ed.ddacdc89ec88482db20c676f0861a336f3d0409f94748c209847b49529d73885\n",
+ "All model checkpoint weights were used when initializing T5ForConditionalGeneration.\n",
+ "\n",
+ "All the weights of T5ForConditionalGeneration were initialized from the model checkpoint at t5-small.\n",
+ "If your task is similar to the task the model of the checkpoint was trained on, you can already use T5ForConditionalGeneration for predictions without further training.\n",
+ "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py:2882: FutureWarning: The error_bad_lines argument has been deprecated and will be removed in a future version.\n",
+ "\n",
+ "\n",
+ " exec(code_obj, self.user_global_ns, self.user_ns)\n",
+ "Skipping line 16844: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 19370: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 31753: field larger than field limit (131072)\n",
+ "Skipping line 33676: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 65976: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 116130: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 127080: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 154052: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 174200: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 189740: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 274245: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 275624: field larger than field limit (131072)\n",
+ "Skipping line 302668: field larger than field limit (131072)\n",
+ "Skipping line 307322: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 317541: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 333957: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 345859: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 359845: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 359846: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 359847: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 359849: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 371329: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 384761: field larger than field limit (131072)\n",
+ "Skipping line 389712: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 391820: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 398927: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 401260: field larger than field limit (131072)\n",
+ "Skipping line 403079: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 454667: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 465419: field larger than field limit (131072)\n",
+ "Skipping line 466152: field larger than field limit (131072)\n",
+ "Skipping line 485309: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 529874: field larger than field limit (131072)\n",
+ "Skipping line 552169: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 554628: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 560429: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 589855: field larger than field limit (131072)\n",
+ "Skipping line 601507: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 614020: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 630106: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 632882: field larger than field limit (131072)\n",
+ "Skipping line 637573: NULL byte detected. This byte cannot be processed in Python's native csv library at the moment, so please pass in engine='c' instead\n",
+ "Skipping line 658667: field larger than field limit (131072)\n"
+ ]
+ }
+ ],
+ "source": [
+ "from transformers import T5Tokenizer, T5ForConditionalGeneration\n",
+ "from transformers import EvalPrediction\n",
+ "\n",
+ "model_name = \"t5-small\"\n",
+ "tokenizer = T5Tokenizer.from_pretrained(model_name)\n",
+ "model = T5ForConditionalGeneration.from_pretrained(model_name)\n",
+ "\n",
+ "\"\"\"\n",
+ "1 - 22 -> 1 klasa\n",
+ "23 - 31 -> 2 klasa\n",
+ "32 - 39 -> 3 klasa \n",
+ "40 - 48 -> 4 klasa\n",
+ "\"\"\"\n",
+ "\n",
+ "def mapAgeToClass2(value: pd.DataFrame):\n",
+ " if(value['age'] <=22):\n",
+ " # return [1,0,0,0]\n",
+ " return 'class1'\n",
+ " elif(value['age'] > 22 and value['age'] <= 31):\n",
+ " # return [0,1,0,0]\n",
+ " return 'class2'\n",
+ " elif(value['age'] > 31 and value['age'] <= 39):\n",
+ " # return [0,0,1,0]\n",
+ " return 'class3'\n",
+ " else:\n",
+ " # return [0,0,0,1]\n",
+ " return 'class4'\n",
+ "\n",
+ "data_path = 'drive/MyDrive/blogtext.csv'\n",
+ "\n",
+ "data = pd.read_csv(data_path, error_bad_lines=False, engine='python')\n",
+ "data = data[:data_amount]\n",
+ "data['label'] = data.apply(lambda row: mapAgeToClass2(row), axis=1)\n",
+ "\n",
+ "\n",
+ "X = list(data['text'])\n",
+ "Y = list(data['label'])\n",
+ "if (torch.cuda.is_available()):\n",
+ " device = \"cuda:0\"\n",
+ " torch.cuda.empty_cache()\n",
+ "\n",
+ "\n",
+ "X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2)\n",
+ "\n",
+ "X_train_tokenized = tokenizer(X_train, padding=True, truncation=True, max_length=1024)\n",
+ "X_val_tokenized = tokenizer(X_val, padding=True, truncation=True, max_length=1024)\n",
+ "\n",
+ "class Dataset(torch.utils.data.Dataset):\n",
+ " def __init__(self, encodings, labels=None):\n",
+ " self.encodings = encodings\n",
+ " self.labels = labels\n",
+ "\n",
+ " def __getitem__(self, idx):\n",
+ " item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}\n",
+ " if self.labels:\n",
+ " item[\"labels\"] = torch.tensor(tokenizer(self.labels[idx])['input_ids'])\n",
+ " return item\n",
+ "\n",
+ " def __len__(self):\n",
+ " return len(self.encodings[\"input_ids\"])\n",
+ "\n",
+ "train_dataset = Dataset(X_train_tokenized, y_train)\n",
+ "val_dataset = Dataset(X_val_tokenized, y_val)\n",
+ "\n",
+ "def compute_metrics(pred):\n",
+ " labels_ids = pred.label_ids\n",
+ " pred_ids = pred.predictions\n",
+ "\n",
+ " pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)\n",
+ " label_str = tokenizer.batch_decode(labels_ids, skip_special_tokens=True)\n",
+ "\n",
+ " accuracy = sum([int(pred_str[i] == label_str[i]) for i in range(len(pred_str))]) / len(pred_str)\n",
+ "\n",
+ " return {\"accuracy\": accuracy}\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer\n",
+ "\n",
+ "args = Seq2SeqTrainingArguments(\n",
+ " output_dir=\"output\",\n",
+ " evaluation_strategy=\"steps\",\n",
+ " eval_steps=50,\n",
+ " per_device_train_batch_size=8,\n",
+ " per_device_eval_batch_size=8,\n",
+ " num_train_epochs=3,\n",
+ " seed=0,\n",
+ " load_best_model_at_end=True,\n",
+ " predict_with_generate=True\n",
+ ")\n",
+ "\n",
+ "trainer = Seq2SeqTrainer(\n",
+ " model=model,\n",
+ " args=args,\n",
+ " train_dataset=train_dataset,\n",
+ " eval_dataset=val_dataset,\n",
+ " compute_metrics=compute_metrics\n",
+ ")"
+ ],
+ "metadata": {
+ "id": "XayaHmAMgI1x",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "4c32a772-96bc-4a43-b406-110c5f311932"
+ },
+ "execution_count": 19,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "PyTorch: setting up devices\n",
+ "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "id": "8nVY24TCz1Mi",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "outputId": "b4542048-d208-463a-b088-df9645f8b92d"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.7/dist-packages/transformers/optimization.py:309: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+ " FutureWarning,\n",
+ "***** Running training *****\n",
+ " Num examples = 4000\n",
+ " Num Epochs = 3\n",
+ " Instantaneous batch size per device = 8\n",
+ " Total train batch size (w. parallel, distributed & accumulation) = 8\n",
+ " Gradient Accumulation steps = 1\n",
+ " Total optimization steps = 1500\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ "\n",
+ "