projekt-glebokie/GPT_2.ipynb
2023-02-12 23:36:28 +01:00

1339 lines
73 KiB
Plaintext
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
"gpuClass": "standard",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"16f1b324020d48c3a8fd4487c42bbd6b": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_df036cb95c56454998cb7c788d341584",
"IPY_MODEL_4a0387dee622459498ddc9d7bf201187",
"IPY_MODEL_604840d710474c71a676c8368c9b3f2f"
],
"layout": "IPY_MODEL_7e3d18de5d554030bd6aa801ac7f3192"
}
},
"df036cb95c56454998cb7c788d341584": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_419e5a2aab8147d79490748f633675cd",
"placeholder": "",
"style": "IPY_MODEL_41452374c2a64afd82d30744b36dd801",
"value": "100%"
}
},
"4a0387dee622459498ddc9d7bf201187": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_ea94ef66672d4b69bf0d5eac6f7dada3",
"max": 3,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_a7e7dd3a259a4a878cfcd6a66ed35c7c",
"value": 3
}
},
"604840d710474c71a676c8368c9b3f2f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_275a9313a7e14c57b66cbe484499c8ec",
"placeholder": "",
"style": "IPY_MODEL_c01651c9c010429bbf5507770ce6b6ce",
"value": " 3/3 [00:00<00:00, 136.35it/s]"
}
},
"7e3d18de5d554030bd6aa801ac7f3192": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"419e5a2aab8147d79490748f633675cd": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"41452374c2a64afd82d30744b36dd801": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"ea94ef66672d4b69bf0d5eac6f7dada3": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"a7e7dd3a259a4a878cfcd6a66ed35c7c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"275a9313a7e14c57b66cbe484499c8ec": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"c01651c9c010429bbf5507770ce6b6ce": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
}
}
}
},
"cells": [
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "JErLYXsaYy8-"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"! pip install datasets transformers torch scikit-learn evaluate"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "u29i-U30zRjY",
"outputId": "55534ca2-097f-4e7a-a517-463f974148cf"
},
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Collecting datasets\n",
" Downloading datasets-2.9.0-py3-none-any.whl (462 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m462.8/462.8 KB\u001b[0m \u001b[31m8.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting transformers\n",
" Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.3/6.3 MB\u001b[0m \u001b[31m55.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: torch in /usr/local/lib/python3.8/dist-packages (1.13.1+cu116)\n",
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.8/dist-packages (1.0.2)\n",
"Collecting evaluate\n",
" Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.4/81.4 KB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from datasets) (1.21.6)\n",
"Collecting xxhash\n",
" Downloading xxhash-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (213 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m213.0/213.0 KB\u001b[0m \u001b[31m2.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.8/dist-packages (from datasets) (2023.1.0)\n",
"Requirement already satisfied: aiohttp in /usr/local/lib/python3.8/dist-packages (from datasets) (3.8.3)\n",
"Collecting responses<0.19\n",
" Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.8/dist-packages (from datasets) (6.0)\n",
"Requirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.8/dist-packages (from datasets) (9.0.0)\n",
"Collecting huggingface-hub<1.0.0,>=0.2.0\n",
" Downloading huggingface_hub-0.12.0-py3-none-any.whl (190 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m190.3/190.3 KB\u001b[0m \u001b[31m16.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: dill<0.3.7 in /usr/local/lib/python3.8/dist-packages (from datasets) (0.3.6)\n",
"Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.8/dist-packages (from datasets) (4.64.1)\n",
"Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.8/dist-packages (from datasets) (2.25.1)\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.8/dist-packages (from datasets) (1.3.5)\n",
"Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from datasets) (23.0)\n",
"Collecting multiprocess\n",
" Downloading multiprocess-0.70.14-py38-none-any.whl (132 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m132.0/132.0 KB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.8/dist-packages (from transformers) (2022.6.2)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from transformers) (3.9.0)\n",
"Collecting tokenizers!=0.11.3,<0.14,>=0.11.1\n",
" Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m51.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: typing-extensions in /usr/local/lib/python3.8/dist-packages (from torch) (4.4.0)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (3.1.0)\n",
"Requirement already satisfied: scipy>=1.1.0 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.7.3)\n",
"Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.2.0)\n",
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.8.2)\n",
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (22.2.0)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.3)\n",
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (4.0.2)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (6.0.4)\n",
"Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (2.1.1)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.1)\n",
"Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->datasets) (4.0.0)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->datasets) (1.24.3)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->datasets) (2.10)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->datasets) (2022.12.7)\n",
"Collecting urllib3<1.27,>=1.21.1\n",
" Downloading urllib3-1.26.14-py2.py3-none-any.whl (140 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m140.6/140.6 KB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hRequirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas->datasets) (2022.7.1)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas->datasets) (2.8.2)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.8/dist-packages (from python-dateutil>=2.7.3->pandas->datasets) (1.15.0)\n",
"Installing collected packages: tokenizers, xxhash, urllib3, multiprocess, responses, huggingface-hub, transformers, datasets, evaluate\n",
" Attempting uninstall: urllib3\n",
" Found existing installation: urllib3 1.24.3\n",
" Uninstalling urllib3-1.24.3:\n",
" Successfully uninstalled urllib3-1.24.3\n",
"Successfully installed datasets-2.9.0 evaluate-0.4.0 huggingface-hub-0.12.0 multiprocess-0.70.14 responses-0.18.0 tokenizers-0.13.2 transformers-4.26.1 urllib3-1.26.14 xxhash-3.2.0\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [],
"metadata": {
"id": "a_f-yno_zity"
}
},
{
"cell_type": "code",
"source": [
"!wget 'https://git.wmi.amu.edu.pl/s444465/projekt-glebokie/raw/branch/master/run_glue.py' -O 'run_glue.py'\n",
"!wget 'https://git.wmi.amu.edu.pl/s444465/projekt-glebokie/raw/branch/master/roberta.py' -O 'roberta.py'\n",
"!wget 'https://git.wmi.amu.edu.pl/s444465/projekt-glebokie/raw/branch/master/gpt2.py' -O 'gpt2.py'"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "V_HmRNcmzhsw",
"outputId": "feafb930-4dbf-436c-8e37-de4e8b8a32cc"
},
"execution_count": 2,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"--2023-02-12 21:57:57-- https://git.wmi.amu.edu.pl/s444465/projekt-glebokie/raw/branch/master/run_glue.py\n",
"Resolving git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)... 150.254.78.40\n",
"Connecting to git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)|150.254.78.40|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 30601 (30K) [text/plain]\n",
"Saving to: run_glue.py\n",
"\n",
"run_glue.py 100%[===================>] 29.88K --.-KB/s in 0.1s \n",
"\n",
"2023-02-12 21:57:58 (248 KB/s) - run_glue.py saved [30601/30601]\n",
"\n",
"--2023-02-12 21:57:58-- https://git.wmi.amu.edu.pl/s444465/projekt-glebokie/raw/branch/master/roberta.py\n",
"Resolving git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)... 150.254.78.40\n",
"Connecting to git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)|150.254.78.40|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 12783 (12K) [text/plain]\n",
"Saving to: roberta.py\n",
"\n",
"roberta.py 100%[===================>] 12.48K --.-KB/s in 0s \n",
"\n",
"2023-02-12 21:57:58 (265 MB/s) - roberta.py saved [12783/12783]\n",
"\n",
"--2023-02-12 21:57:58-- https://git.wmi.amu.edu.pl/s444465/projekt-glebokie/raw/branch/master/gpt2.py\n",
"Resolving git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)... 150.254.78.40\n",
"Connecting to git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)|150.254.78.40|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 7976 (7.8K) [text/plain]\n",
"Saving to: gpt2.py\n",
"\n",
"gpt2.py 100%[===================>] 7.79K --.-KB/s in 0s \n",
"\n",
"2023-02-12 21:57:59 (1.37 GB/s) - gpt2.py saved [7976/7976]\n",
"\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import json\n",
"from pathlib import Path\n",
"from typing import Dict, List\n",
"from datasets import load_dataset\n",
"\n",
"loaded_data = load_dataset('emotion')\n",
"!mkdir -v -p data\n",
"\n",
"train_path = Path('data/train.json')\n",
"valid_path = Path('data/valid.json')\n",
"test_path = Path('data/test.json')\n",
"data_train, data_valid, data_test = [], [], []\n",
"\n",
"for source_data, dataset, max_size in [\n",
" (loaded_data['train'], data_train, None),\n",
" (loaded_data['validation'], data_valid, None),\n",
" (loaded_data['test'], data_test, None),\n",
"]:\n",
" for i, data in enumerate(source_data):\n",
" if max_size is not None and i >= max_size:\n",
" break\n",
" data_line = {\n",
" 'label': int(data['label']),\n",
" 'text': data['text'],\n",
" }\n",
" dataset.append(data_line)\n",
"\n",
"print(f'Train: {len(data_train):6d}')\n",
"print(f'Valid: {len(data_valid):6d}')\n",
"\n",
"data_class_1, data_class_2 = [], []\n",
"\n",
"\"\"\"for data in data_valid:\n",
" label = data['label']\n",
" if label == 0:\n",
" data_class_1.append(data)\n",
" elif label == 1:\n",
" data_class_2.append(data)\n",
"\n",
"print(f'Label 1: {len(data_class_1):6d}')\n",
"print(f'Label 2: {len(data_class_2):6d}')\n",
"\n",
"size_half_class_1 = int(len(data_class_1) / 2)\n",
"size_half_class_2 = int(len(data_class_2) / 2)\n",
"\n",
"data_valid = data_class_1[:size_half_class_1] + data_class_2[:size_half_class_2]\n",
"data_test = data_class_1[size_half_class_1:] + data_class_2[size_half_class_2:]\n",
"\"\"\"\n",
"\n",
"print(f'Valid: {len(data_valid):6d}')\n",
"print(f'Test : {len(data_test):6d}')\n",
"\n",
"MAP_LABEL_TRANSLATION = {\n",
" 0: 'sadness',\n",
" 1: 'joy',\n",
" 2: 'love',\n",
" 3: 'anger',\n",
" 4: 'fear',\n",
" 5: 'surprise',\n",
"}\n",
"\n",
"def save_as_translations(original_save_path: Path, data_to_save: List[Dict]) -> None:\n",
" file_name = 's2s-' + original_save_path.name\n",
" file_path = original_save_path.parent / file_name\n",
"\n",
" print(f'Saving into: {file_path}')\n",
" with open(file_path, 'wt') as f_write:\n",
" for data_line in data_to_save:\n",
" label = data_line['label']\n",
" new_label = MAP_LABEL_TRANSLATION[label]\n",
" data_line['label'] = new_label\n",
" data_line_str = json.dumps(data_line)\n",
" f_write.write(f'{data_line_str}\\n')\n",
"\n",
"for file_path, data_to_save in [(train_path, data_train), (valid_path, data_valid), (test_path, data_test)]:\n",
" print(f'Saving into: {file_path}')\n",
" with open(file_path, 'wt') as f_write:\n",
" for data_line in data_to_save:\n",
" data_line_str = json.dumps(data_line)\n",
" f_write.write(f'{data_line_str}\\n')\n",
" \n",
" save_as_translations(file_path, data_to_save)\n",
"\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 295,
"referenced_widgets": [
"16f1b324020d48c3a8fd4487c42bbd6b",
"df036cb95c56454998cb7c788d341584",
"4a0387dee622459498ddc9d7bf201187",
"604840d710474c71a676c8368c9b3f2f",
"7e3d18de5d554030bd6aa801ac7f3192",
"419e5a2aab8147d79490748f633675cd",
"41452374c2a64afd82d30744b36dd801",
"ea94ef66672d4b69bf0d5eac6f7dada3",
"a7e7dd3a259a4a878cfcd6a66ed35c7c",
"275a9313a7e14c57b66cbe484499c8ec",
"c01651c9c010429bbf5507770ce6b6ce"
]
},
"id": "bcR4tWQl0rqt",
"outputId": "6a2bad78-8eb7-4a90-c839-b7cc470438d7"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"WARNING:datasets.builder:No config specified, defaulting to: emotion/split\n",
"WARNING:datasets.builder:Found cached dataset emotion (/root/.cache/huggingface/datasets/emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd)\n"
]
},
{
"output_type": "display_data",
"data": {
"text/plain": [
" 0%| | 0/3 [00:00<?, ?it/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "16f1b324020d48c3a8fd4487c42bbd6b"
}
},
"metadata": {}
},
{
"output_type": "stream",
"name": "stdout",
"text": [
"mkdir: created directory 'data'\n",
"Train: 16000\n",
"Valid: 2000\n",
"Valid: 2000\n",
"Test : 2000\n",
"Saving into: data/train.json\n",
"Saving into: data/s2s-train.json\n",
"Saving into: data/valid.json\n",
"Saving into: data/s2s-valid.json\n",
"Saving into: data/test.json\n",
"Saving into: data/s2s-test.json\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"\n",
"!head -n 4500 data/train.json > data/train-5k.json\n",
"!tail -n 2500 data/train.json >> data/train-5k.json\n",
"!wc -l data/train-5k.json"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "pRmHIvyB0fZe",
"outputId": "a6f163f0-a393-431c-92e9-aaaf04601832"
},
"execution_count": 5,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"7000 data/train-5k.json\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"from pathlib import Path\n",
"\n",
"for file_name in [\"train\", \"valid\", \"test\", \"s2s-train\", \"s2s-valid\", \"s2s-test\"]:\n",
" print(f\"=== {file_name} ===\")\n",
" all_text = Path(f\"data/{file_name}.json\").read_text().split('\\n')\n",
" text = all_text[:2500] + all_text[-2500:]\n",
" Path(f\"data/{file_name}-5k.json\").write_text(\"\\n\".join(text))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "rFa6ijdx2L28",
"outputId": "3303cd99-beba-4685-d8a0-80f819b1b50d"
},
"execution_count": 6,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"=== train ===\n",
"=== valid ===\n",
"=== test ===\n",
"=== s2s-train ===\n",
"=== s2s-valid ===\n",
"=== s2s-test ===\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"import os\n",
"\n",
"os.environ['TOKENIZERS_PARALLELISM'] = 'true'"
],
"metadata": {
"id": "8opbDvBv3ZlK"
},
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "pxuxjHt8P57X"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!python run_glue.py \\\n",
"--cache_dir .cache_training \\\n",
"--model_name_or_path gpt2 \\\n",
"--custom_model gpt2_hidden \\\n",
"--freeze_weights \\\n",
"--train_file data/s2s-train.json \\\n",
"--validation_file data/s2s-valid.json \\\n",
"--test_file data/s2s-test.json \\\n",
"--per_device_train_batch_size 24 \\\n",
"--per_device_eval_batch_size 24 \\\n",
"--do_train \\\n",
"--do_eval \\\n",
"--do_predict \\\n",
"--max_seq_length 128 \\\n",
"--learning_rate 2e-5 \\\n",
"--num_train_epochs 5 \\\n",
"--output_dir out/imdb-5k/gpt2"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "XkkeRPG_z3Jc",
"outputId": "ffdacc37-5c06-401a-a588-a1d272dd72b0"
},
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"2023-02-12 22:00:15.880386: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2023-02-12 22:00:16.771169: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-12 22:00:16.771276: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-12 22:00:16.771294: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
"WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
"INFO:__main__:Training/evaluation parameters TrainingArguments(\n",
"_n_gpu=1,\n",
"adafactor=False,\n",
"adam_beta1=0.9,\n",
"adam_beta2=0.999,\n",
"adam_epsilon=1e-08,\n",
"auto_find_batch_size=False,\n",
"bf16=False,\n",
"bf16_full_eval=False,\n",
"data_seed=None,\n",
"dataloader_drop_last=False,\n",
"dataloader_num_workers=0,\n",
"dataloader_pin_memory=True,\n",
"ddp_bucket_cap_mb=None,\n",
"ddp_find_unused_parameters=None,\n",
"ddp_timeout=1800,\n",
"debug=[],\n",
"deepspeed=None,\n",
"disable_tqdm=False,\n",
"do_eval=True,\n",
"do_predict=True,\n",
"do_train=True,\n",
"eval_accumulation_steps=None,\n",
"eval_delay=0,\n",
"eval_steps=None,\n",
"evaluation_strategy=no,\n",
"fp16=False,\n",
"fp16_backend=auto,\n",
"fp16_full_eval=False,\n",
"fp16_opt_level=O1,\n",
"fsdp=[],\n",
"fsdp_min_num_params=0,\n",
"fsdp_transformer_layer_cls_to_wrap=None,\n",
"full_determinism=False,\n",
"gradient_accumulation_steps=1,\n",
"gradient_checkpointing=False,\n",
"greater_is_better=None,\n",
"group_by_length=False,\n",
"half_precision_backend=auto,\n",
"hub_model_id=None,\n",
"hub_private_repo=False,\n",
"hub_strategy=every_save,\n",
"hub_token=<HUB_TOKEN>,\n",
"ignore_data_skip=False,\n",
"include_inputs_for_metrics=False,\n",
"jit_mode_eval=False,\n",
"label_names=None,\n",
"label_smoothing_factor=0.0,\n",
"learning_rate=2e-05,\n",
"length_column_name=length,\n",
"load_best_model_at_end=False,\n",
"local_rank=-1,\n",
"log_level=passive,\n",
"log_level_replica=passive,\n",
"log_on_each_node=True,\n",
"logging_dir=out/imdb-5k/gpt2/runs/Feb12_22-00-19_506c7abe63fb,\n",
"logging_first_step=False,\n",
"logging_nan_inf_filter=True,\n",
"logging_steps=500,\n",
"logging_strategy=steps,\n",
"lr_scheduler_type=linear,\n",
"max_grad_norm=1.0,\n",
"max_steps=-1,\n",
"metric_for_best_model=None,\n",
"mp_parameters=,\n",
"no_cuda=False,\n",
"num_train_epochs=5.0,\n",
"optim=adamw_hf,\n",
"optim_args=None,\n",
"output_dir=out/imdb-5k/gpt2,\n",
"overwrite_output_dir=False,\n",
"past_index=-1,\n",
"per_device_eval_batch_size=24,\n",
"per_device_train_batch_size=24,\n",
"prediction_loss_only=False,\n",
"push_to_hub=False,\n",
"push_to_hub_model_id=None,\n",
"push_to_hub_organization=None,\n",
"push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
"ray_scope=last,\n",
"remove_unused_columns=True,\n",
"report_to=['tensorboard'],\n",
"resume_from_checkpoint=None,\n",
"run_name=out/imdb-5k/gpt2,\n",
"save_on_each_node=False,\n",
"save_steps=500,\n",
"save_strategy=steps,\n",
"save_total_limit=None,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torch_compile=False,\n",
"torch_compile_backend=None,\n",
"torch_compile_mode=None,\n",
"torchdynamo=None,\n",
"tpu_metrics_debug=False,\n",
"tpu_num_cores=None,\n",
"use_ipex=False,\n",
"use_legacy_prediction_loop=False,\n",
"use_mps_device=False,\n",
"warmup_ratio=0.0,\n",
"warmup_steps=0,\n",
"weight_decay=0.0,\n",
"xpu_backend=None,\n",
")\n",
"INFO:__main__:load a local file for train: data/s2s-train.json\n",
"INFO:__main__:load a local file for validation: data/s2s-valid.json\n",
"INFO:__main__:load a local file for test: data/s2s-test.json\n",
"WARNING:datasets.builder:Using custom data configuration default-623c8a7b15a2e58a\n",
"INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n",
"INFO:datasets.builder:Generating dataset json (/content/.cache_training/json/default-623c8a7b15a2e58a/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
"Downloading and preparing dataset json/default to /content/.cache_training/json/default-623c8a7b15a2e58a/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n",
"Downloading data files: 100% 3/3 [00:00<00:00, 14873.42it/s]\n",
"INFO:datasets.download.download_manager:Downloading took 0.0 min\n",
"INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n",
"Extracting data files: 100% 3/3 [00:00<00:00, 1763.55it/s]\n",
"INFO:datasets.utils.info_utils:Unable to verify checksums.\n",
"INFO:datasets.builder:Generating train split\n",
"INFO:datasets.builder:Generating validation split\n",
"INFO:datasets.builder:Generating test split\n",
"INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n",
"Dataset json downloaded and prepared to /content/.cache_training/json/default-623c8a7b15a2e58a/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n",
"100% 3/3 [00:00<00:00, 1028.86it/s]\n",
"Downloading (…)lve/main/config.json: 100% 665/665 [00:00<00:00, 126kB/s]\n",
"[INFO|configuration_utils.py:660] 2023-02-12 22:00:20,342 >> loading configuration file config.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
"[INFO|configuration_utils.py:712] 2023-02-12 22:00:20,343 >> Model config GPT2Config {\n",
" \"_name_or_path\": \"gpt2\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"id2label\": {\n",
" \"0\": \"LABEL_0\",\n",
" \"1\": \"LABEL_1\",\n",
" \"2\": \"LABEL_2\",\n",
" \"3\": \"LABEL_3\",\n",
" \"4\": \"LABEL_4\",\n",
" \"5\": \"LABEL_5\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"label2id\": {\n",
" \"LABEL_0\": 0,\n",
" \"LABEL_1\": 1,\n",
" \"LABEL_2\": 2,\n",
" \"LABEL_3\": 3,\n",
" \"LABEL_4\": 4,\n",
" \"LABEL_5\": 5\n",
" },\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"reorder_and_upcast_attn\": false,\n",
" \"resid_pdrop\": 0.1,\n",
" \"scale_attn_by_inverse_layer_idx\": false,\n",
" \"scale_attn_weights\": true,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.26.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
"[INFO|tokenization_auto.py:458] 2023-02-12 22:00:20,434 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
"[INFO|configuration_utils.py:660] 2023-02-12 22:00:20,525 >> loading configuration file config.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
"[INFO|configuration_utils.py:712] 2023-02-12 22:00:20,525 >> Model config GPT2Config {\n",
" \"_name_or_path\": \"gpt2\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"initializer_range\": 0.02,\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"reorder_and_upcast_attn\": false,\n",
" \"resid_pdrop\": 0.1,\n",
" \"scale_attn_by_inverse_layer_idx\": false,\n",
" \"scale_attn_weights\": true,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.26.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
"Downloading (…)olve/main/vocab.json: 100% 1.04M/1.04M [00:00<00:00, 9.19MB/s]\n",
"Downloading (…)olve/main/merges.txt: 100% 456k/456k [00:00<00:00, 4.93MB/s]\n",
"Downloading (…)/main/tokenizer.json: 100% 1.36M/1.36M [00:00<00:00, 11.9MB/s]\n",
"[INFO|tokenization_utils_base.py:1802] 2023-02-12 22:00:21,743 >> loading file vocab.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/vocab.json\n",
"[INFO|tokenization_utils_base.py:1802] 2023-02-12 22:00:21,743 >> loading file merges.txt from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/merges.txt\n",
"[INFO|tokenization_utils_base.py:1802] 2023-02-12 22:00:21,743 >> loading file tokenizer.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer.json\n",
"[INFO|tokenization_utils_base.py:1802] 2023-02-12 22:00:21,744 >> loading file added_tokens.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1802] 2023-02-12 22:00:21,744 >> loading file special_tokens_map.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1802] 2023-02-12 22:00:21,744 >> loading file tokenizer_config.json from cache at None\n",
"[INFO|configuration_utils.py:660] 2023-02-12 22:00:21,744 >> loading configuration file config.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
"[INFO|configuration_utils.py:712] 2023-02-12 22:00:21,745 >> Model config GPT2Config {\n",
" \"_name_or_path\": \"gpt2\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"initializer_range\": 0.02,\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"reorder_and_upcast_attn\": false,\n",
" \"resid_pdrop\": 0.1,\n",
" \"scale_attn_by_inverse_layer_idx\": false,\n",
" \"scale_attn_weights\": true,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.26.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
"INFO:__main__:Using hidden states in model: True\n",
"INFO:__main__:Using implementation from class: GPT2ForSequenceClassificationCustom\n",
"Downloading (…)\"pytorch_model.bin\";: 100% 548M/548M [00:05<00:00, 103MB/s]\n",
"[INFO|modeling_utils.py:2275] 2023-02-12 22:00:27,304 >> loading weights file pytorch_model.bin from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin\n",
"[INFO|modeling_utils.py:2857] 2023-02-12 22:00:30,150 >> All model checkpoint weights were used when initializing GPT2ForSequenceClassificationCustom.\n",
"\n",
"[WARNING|modeling_utils.py:2859] 2023-02-12 22:00:30,150 >> Some weights of GPT2ForSequenceClassificationCustom were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.dense_1_hidden.weight', 'score.dense_2.weight', 'score.out_proj.weight', 'score.dense_2.bias', 'score.dense_1_hidden.bias', 'score.dense_1_input.bias', 'score.dense_1_input.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
"INFO:__main__:Freezing encoder weights\n",
"INFO:__main__:Freezing layer 1\n",
"INFO:__main__:Freezing layer 2\n",
"INFO:__main__:Freezing layer 3\n",
"INFO:__main__:Freezing layer 4\n",
"INFO:__main__:Freezing layer 5\n",
"INFO:__main__:Freezing layer 6\n",
"INFO:__main__:Freezing layer 7\n",
"INFO:__main__:Freezing layer 8\n",
"INFO:__main__:Freezing layer 9\n",
"INFO:__main__:Freezing layer 10\n",
"INFO:__main__:Freezing layer 11\n",
"INFO:__main__:Freezing layer 12\n",
"INFO:__main__:Freezing layer 13\n",
"INFO:__main__:Freezing layer 14\n",
"INFO:__main__:Freezing layer 15\n",
"INFO:__main__:Freezing layer 16\n",
"INFO:__main__:Freezing layer 17\n",
"INFO:__main__:Freezing layer 18\n",
"INFO:__main__:Freezing layer 19\n",
"INFO:__main__:Freezing layer 20\n",
"INFO:__main__:Freezing layer 21\n",
"INFO:__main__:Freezing layer 22\n",
"INFO:__main__:Freezing layer 23\n",
"INFO:__main__:Freezing layer 24\n",
"INFO:__main__:Freezing layer 25\n",
"INFO:__main__:Freezing layer 26\n",
"INFO:__main__:Freezing layer 27\n",
"INFO:__main__:Freezing layer 28\n",
"INFO:__main__:Freezing layer 29\n",
"INFO:__main__:Freezing layer 30\n",
"INFO:__main__:Freezing layer 31\n",
"INFO:__main__:Freezing layer 32\n",
"INFO:__main__:Freezing layer 33\n",
"INFO:__main__:Freezing layer 34\n",
"INFO:__main__:Freezing layer 35\n",
"INFO:__main__:Freezing layer 36\n",
"INFO:__main__:Freezing layer 37\n",
"INFO:__main__:Freezing layer 38\n",
"INFO:__main__:Freezing layer 39\n",
"INFO:__main__:Freezing layer 40\n",
"INFO:__main__:Ignoring layer 41\n",
"INFO:__main__:Ignoring layer 42\n",
"INFO:__main__:Ignoring layer 43\n",
"INFO:__main__:Ignoring layer 44\n",
"INFO:__main__:Ignoring layer 45\n",
"INFO:__main__:Ignoring layer 46\n",
"INFO:__main__:Ignoring layer 47\n",
"INFO:__main__:Ignoring layer 48\n",
"INFO:__main__:Ignoring layer 49\n",
"INFO:__main__:Ignoring layer 50\n",
"INFO:__main__:Ignoring layer 51\n",
"INFO:__main__:Ignoring layer 52\n",
"INFO:__main__:Ignoring layer 53\n",
"INFO:__main__:Ignoring layer 54\n",
"INFO:__main__:Ignoring layer 55\n",
"INFO:__main__:Ignoring layer 56\n",
"INFO:__main__:Ignoring layer 57\n",
"INFO:__main__:Ignoring layer 58\n",
"INFO:__main__:Ignoring layer 59\n",
"INFO:__main__:Ignoring layer 60\n",
"INFO:__main__:Ignoring layer 61\n",
"INFO:__main__:Ignoring layer 62\n",
"INFO:__main__:Ignoring layer 63\n",
"INFO:__main__:Ignoring layer 64\n",
"INFO:__main__:Ignoring layer 65\n",
"INFO:__main__:Ignoring layer 66\n",
"INFO:__main__:Ignoring layer 67\n",
"INFO:__main__:Ignoring layer 68\n",
"INFO:__main__:Ignoring layer 69\n",
"INFO:__main__:Ignoring layer 70\n",
"INFO:__main__:Ignoring layer 71\n",
"INFO:__main__:Ignoring layer 72\n",
"INFO:__main__:Ignoring layer 73\n",
"INFO:__main__:Ignoring layer 74\n",
"INFO:__main__:Ignoring layer 75\n",
"INFO:__main__:Ignoring layer 76\n",
"INFO:__main__:Ignoring layer 77\n",
"INFO:__main__:Ignoring layer 78\n",
"INFO:__main__:Ignoring layer 79\n",
"INFO:__main__:Ignoring layer 80\n",
"INFO:__main__:Ignoring layer 81\n",
"INFO:__main__:Ignoring layer 82\n",
"INFO:__main__:Ignoring layer 83\n",
"INFO:__main__:Ignoring layer 84\n",
"INFO:__main__:Ignoring layer 85\n",
"INFO:__main__:Ignoring layer 86\n",
"INFO:__main__:Ignoring layer 87\n",
"INFO:__main__:Ignoring layer 88\n",
"INFO:__main__:Ignoring layer 89\n",
"INFO:__main__:Ignoring layer 90\n",
"INFO:__main__:Ignoring layer 91\n",
"INFO:__main__:Ignoring layer 92\n",
"INFO:__main__:Ignoring layer 93\n",
"INFO:__main__:Ignoring layer 94\n",
"INFO:__main__:Ignoring layer 95\n",
"INFO:__main__:Ignoring layer 96\n",
"INFO:__main__:Ignoring layer 97\n",
"INFO:__main__:Ignoring layer 98\n",
"INFO:__main__:Ignoring layer 99\n",
"INFO:__main__:Ignoring layer 100\n",
"INFO:__main__:Ignoring layer 101\n",
"INFO:__main__:Ignoring layer 102\n",
"INFO:__main__:Ignoring layer 103\n",
"INFO:__main__:Ignoring layer 104\n",
"INFO:__main__:Ignoring layer 105\n",
"INFO:__main__:Ignoring layer 106\n",
"INFO:__main__:Ignoring layer 107\n",
"INFO:__main__:Ignoring layer 108\n",
"INFO:__main__:Ignoring layer 109\n",
"INFO:__main__:Ignoring layer 110\n",
"INFO:__main__:Ignoring layer 111\n",
"INFO:__main__:Ignoring layer 112\n",
"INFO:__main__:Ignoring layer 113\n",
"INFO:__main__:Ignoring layer 114\n",
"INFO:__main__:Ignoring layer 115\n",
"INFO:__main__:Ignoring layer 116\n",
"INFO:__main__:Ignoring layer 117\n",
"INFO:__main__:Ignoring layer 118\n",
"INFO:__main__:Ignoring layer 119\n",
"INFO:__main__:Ignoring layer 120\n",
"INFO:__main__:Ignoring layer 121\n",
"INFO:__main__:Ignoring layer 122\n",
"INFO:__main__:Ignoring layer 123\n",
"INFO:__main__:Ignoring layer 124\n",
"INFO:__main__:Ignoring layer 125\n",
"INFO:__main__:Ignoring layer 126\n",
"INFO:__main__:Ignoring layer 127\n",
"INFO:__main__:Ignoring layer 128\n",
"INFO:__main__:Ignoring layer 129\n",
"INFO:__main__:Ignoring layer 130\n",
"INFO:__main__:Ignoring layer 131\n",
"INFO:__main__:Ignoring layer 132\n",
"INFO:__main__:Ignoring layer 133\n",
"INFO:__main__:Ignoring layer 134\n",
"INFO:__main__:Ignoring layer 135\n",
"INFO:__main__:Ignoring layer 136\n",
"INFO:__main__:Ignoring layer 137\n",
"INFO:__main__:Ignoring layer 138\n",
"INFO:__main__:Ignoring layer 139\n",
"INFO:__main__:Ignoring layer 140\n",
"INFO:__main__:Ignoring layer 141\n",
"INFO:__main__:Ignoring layer 142\n",
"INFO:__main__:Ignoring layer 143\n",
"INFO:__main__:Ignoring layer 144\n",
"INFO:__main__:Ignoring layer 145\n",
"INFO:__main__:Ignoring layer 146\n",
"INFO:__main__:Ignoring layer 147\n",
"INFO:__main__:Ignoring layer 148\n",
"INFO:__main__:Ignoring layer 149\n",
"INFO:__main__:Ignoring layer 150\n",
"INFO:__main__:Ignoring layer 151\n",
"INFO:__main__:Ignoring layer 152\n",
"INFO:__main__:Ignoring layer 153\n",
"INFO:__main__:Ignoring layer 154\n",
"INFO:__main__:Ignoring layer 155\n",
"[ERROR|tokenization_utils_base.py:1042] 2023-02-12 22:00:30,162 >> Using pad_token, but it is not set yet.\n",
"INFO:__main__:Set PAD token to EOS: <|endoftext|>\n",
"Running tokenizer on dataset: 0% 0/16 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/.cache_training/json/default-623c8a7b15a2e58a/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-c8be49ce7f75c3ae.arrow\n",
"Running tokenizer on dataset: 100% 16/16 [00:01<00:00, 11.14ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/.cache_training/json/default-623c8a7b15a2e58a/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-01058c80c2124fd4.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 6.04ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/.cache_training/json/default-623c8a7b15a2e58a/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-53937ce94683c2e2.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 11.40ba/s]\n",
"INFO:__main__:Sample 10476 of the training set: {'label': 4, 'text': 'i do find new friends i m going to try extra hard to make them stay and if i decide that i don t want to feel hurt again and just ride out the last year of school on my own i m going to have to try extra hard not to care what people think of me being a loner', 'input_ids': [72, 466, 1064, 649, 2460, 1312, 285, 1016, 284, 1949, 3131, 1327, 284, 787, 606, 2652, 290, 611, 1312, 5409, 326, 1312, 836, 256, 765, 284, 1254, 5938, 757, 290, 655, 6594, 503, 262, 938, 614, 286, 1524, 319, 616, 898, 1312, 285, 1016, 284, 423, 284, 1949, 3131, 1327, 407, 284, 1337, 644, 661, 892, 286, 502, 852, 257, 300, 14491, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 1824 of the training set: {'label': 2, 'text': 'i asked them to join me in creating a world where all year old girls could grow up feeling hopeful and powerful', 'input_ids': [72, 1965, 606, 284, 4654, 502, 287, 4441, 257, 995, 810, 477, 614, 1468, 4813, 714, 1663, 510, 4203, 17836, 290, 3665, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 409 of the training set: {'label': 3, 'text': 'i feel when you are a caring person you attract other caring people into your life', 'input_ids': [72, 1254, 618, 345, 389, 257, 18088, 1048, 345, 4729, 584, 18088, 661, 656, 534, 1204, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"Downloading builder script: 100% 4.20k/4.20k [00:00<00:00, 4.82MB/s]\n",
"[INFO|trainer.py:710] 2023-02-12 22:00:38,984 >> The following columns in the training set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" warnings.warn(\n",
"[INFO|trainer.py:1650] 2023-02-12 22:00:38,993 >> ***** Running training *****\n",
"[INFO|trainer.py:1651] 2023-02-12 22:00:38,993 >> Num examples = 16000\n",
"[INFO|trainer.py:1652] 2023-02-12 22:00:38,993 >> Num Epochs = 5\n",
"[INFO|trainer.py:1653] 2023-02-12 22:00:38,993 >> Instantaneous batch size per device = 24\n",
"[INFO|trainer.py:1654] 2023-02-12 22:00:38,993 >> Total train batch size (w. parallel, distributed & accumulation) = 24\n",
"[INFO|trainer.py:1655] 2023-02-12 22:00:38,993 >> Gradient Accumulation steps = 1\n",
"[INFO|trainer.py:1656] 2023-02-12 22:00:38,993 >> Total optimization steps = 3335\n",
"[INFO|trainer.py:1657] 2023-02-12 22:00:38,994 >> Number of trainable parameters = 68517888\n",
"{'loss': 1.0593, 'learning_rate': 1.7001499250374815e-05, 'epoch': 0.75}\n",
" 15% 500/3335 [04:16<24:04, 1.96it/s][INFO|trainer.py:2709] 2023-02-12 22:04:55,709 >> Saving model checkpoint to out/imdb-5k/gpt2/checkpoint-500\n",
"[INFO|configuration_utils.py:453] 2023-02-12 22:04:55,710 >> Configuration saved in out/imdb-5k/gpt2/checkpoint-500/config.json\n",
"[INFO|modeling_utils.py:1704] 2023-02-12 22:04:57,444 >> Model weights saved in out/imdb-5k/gpt2/checkpoint-500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2160] 2023-02-12 22:04:57,444 >> tokenizer config file saved in out/imdb-5k/gpt2/checkpoint-500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2167] 2023-02-12 22:04:57,444 >> Special tokens file saved in out/imdb-5k/gpt2/checkpoint-500/special_tokens_map.json\n",
"{'loss': 0.3829, 'learning_rate': 1.4002998500749626e-05, 'epoch': 1.5}\n",
" 30% 1000/3335 [08:36<19:51, 1.96it/s][INFO|trainer.py:2709] 2023-02-12 22:09:15,813 >> Saving model checkpoint to out/imdb-5k/gpt2/checkpoint-1000\n",
"[INFO|configuration_utils.py:453] 2023-02-12 22:09:15,814 >> Configuration saved in out/imdb-5k/gpt2/checkpoint-1000/config.json\n",
"[INFO|modeling_utils.py:1704] 2023-02-12 22:09:17,628 >> Model weights saved in out/imdb-5k/gpt2/checkpoint-1000/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2160] 2023-02-12 22:09:17,629 >> tokenizer config file saved in out/imdb-5k/gpt2/checkpoint-1000/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2167] 2023-02-12 22:09:17,630 >> Special tokens file saved in out/imdb-5k/gpt2/checkpoint-1000/special_tokens_map.json\n",
"{'loss': 0.256, 'learning_rate': 1.100449775112444e-05, 'epoch': 2.25}\n",
" 45% 1500/3335 [12:56<15:43, 1.95it/s][INFO|trainer.py:2709] 2023-02-12 22:13:36,008 >> Saving model checkpoint to out/imdb-5k/gpt2/checkpoint-1500\n",
"[INFO|configuration_utils.py:453] 2023-02-12 22:13:36,009 >> Configuration saved in out/imdb-5k/gpt2/checkpoint-1500/config.json\n",
"[INFO|modeling_utils.py:1704] 2023-02-12 22:13:37,703 >> Model weights saved in out/imdb-5k/gpt2/checkpoint-1500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2160] 2023-02-12 22:13:37,704 >> tokenizer config file saved in out/imdb-5k/gpt2/checkpoint-1500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2167] 2023-02-12 22:13:37,704 >> Special tokens file saved in out/imdb-5k/gpt2/checkpoint-1500/special_tokens_map.json\n",
"{'loss': 0.2101, 'learning_rate': 8.005997001499251e-06, 'epoch': 3.0}\n",
" 60% 2000/3335 [17:17<11:23, 1.95it/s][INFO|trainer.py:2709] 2023-02-12 22:17:56,308 >> Saving model checkpoint to out/imdb-5k/gpt2/checkpoint-2000\n",
"[INFO|configuration_utils.py:453] 2023-02-12 22:17:56,309 >> Configuration saved in out/imdb-5k/gpt2/checkpoint-2000/config.json\n",
"[INFO|modeling_utils.py:1704] 2023-02-12 22:17:58,005 >> Model weights saved in out/imdb-5k/gpt2/checkpoint-2000/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2160] 2023-02-12 22:17:58,006 >> tokenizer config file saved in out/imdb-5k/gpt2/checkpoint-2000/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2167] 2023-02-12 22:17:58,006 >> Special tokens file saved in out/imdb-5k/gpt2/checkpoint-2000/special_tokens_map.json\n",
"{'loss': 0.17, 'learning_rate': 5.0074962518740634e-06, 'epoch': 3.75}\n",
" 75% 2500/3335 [21:37<07:05, 1.96it/s][INFO|trainer.py:2709] 2023-02-12 22:22:16,686 >> Saving model checkpoint to out/imdb-5k/gpt2/checkpoint-2500\n",
"[INFO|configuration_utils.py:453] 2023-02-12 22:22:16,687 >> Configuration saved in out/imdb-5k/gpt2/checkpoint-2500/config.json\n",
"[INFO|modeling_utils.py:1704] 2023-02-12 22:22:18,356 >> Model weights saved in out/imdb-5k/gpt2/checkpoint-2500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2160] 2023-02-12 22:22:18,357 >> tokenizer config file saved in out/imdb-5k/gpt2/checkpoint-2500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2167] 2023-02-12 22:22:18,357 >> Special tokens file saved in out/imdb-5k/gpt2/checkpoint-2500/special_tokens_map.json\n",
"{'loss': 0.1569, 'learning_rate': 2.008995502248876e-06, 'epoch': 4.5}\n",
" 90% 3000/3335 [25:57<02:51, 1.95it/s][INFO|trainer.py:2709] 2023-02-12 22:26:36,938 >> Saving model checkpoint to out/imdb-5k/gpt2/checkpoint-3000\n",
"[INFO|configuration_utils.py:453] 2023-02-12 22:26:36,939 >> Configuration saved in out/imdb-5k/gpt2/checkpoint-3000/config.json\n",
"[INFO|modeling_utils.py:1704] 2023-02-12 22:26:38,608 >> Model weights saved in out/imdb-5k/gpt2/checkpoint-3000/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2160] 2023-02-12 22:26:38,608 >> tokenizer config file saved in out/imdb-5k/gpt2/checkpoint-3000/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2167] 2023-02-12 22:26:38,608 >> Special tokens file saved in out/imdb-5k/gpt2/checkpoint-3000/special_tokens_map.json\n",
"100% 3335/3335 [28:53<00:00, 2.15it/s][INFO|trainer.py:1901] 2023-02-12 22:29:32,259 >> \n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
"{'train_runtime': 1733.281, 'train_samples_per_second': 46.155, 'train_steps_per_second': 1.924, 'train_loss': 0.35007504373118614, 'epoch': 5.0}\n",
"100% 3335/3335 [28:53<00:00, 1.92it/s]\n",
"[INFO|trainer.py:2709] 2023-02-12 22:29:32,277 >> Saving model checkpoint to out/imdb-5k/gpt2\n",
"[INFO|configuration_utils.py:453] 2023-02-12 22:29:32,278 >> Configuration saved in out/imdb-5k/gpt2/config.json\n",
"[INFO|modeling_utils.py:1704] 2023-02-12 22:29:33,934 >> Model weights saved in out/imdb-5k/gpt2/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2160] 2023-02-12 22:29:33,934 >> tokenizer config file saved in out/imdb-5k/gpt2/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2167] 2023-02-12 22:29:33,934 >> Special tokens file saved in out/imdb-5k/gpt2/special_tokens_map.json\n",
"***** train metrics *****\n",
" epoch = 5.0\n",
" train_loss = 0.3501\n",
" train_runtime = 0:28:53.28\n",
" train_samples = 16000\n",
" train_samples_per_second = 46.155\n",
" train_steps_per_second = 1.924\n",
"INFO:__main__:*** Evaluate ***\n",
"[INFO|trainer.py:710] 2023-02-12 22:29:34,047 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2964] 2023-02-12 22:29:34,108 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2966] 2023-02-12 22:29:34,108 >> Num examples = 2000\n",
"[INFO|trainer.py:2969] 2023-02-12 22:29:34,108 >> Batch size = 24\n",
"100% 84/84 [00:17<00:00, 4.83it/s]\n",
"***** eval metrics *****\n",
" epoch = 5.0\n",
" eval_accuracy = 0.93\n",
" eval_loss = 0.1531\n",
" eval_runtime = 0:00:17.72\n",
" eval_samples = 2000\n",
" eval_samples_per_second = 112.855\n",
" eval_steps_per_second = 4.74\n",
"INFO:__main__:*** Predict ***\n",
"[INFO|trainer.py:710] 2023-02-12 22:29:51,834 >> The following columns in the test set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2964] 2023-02-12 22:29:51,836 >> ***** Running Prediction *****\n",
"[INFO|trainer.py:2966] 2023-02-12 22:29:51,836 >> Num examples = 2000\n",
"[INFO|trainer.py:2969] 2023-02-12 22:29:51,836 >> Batch size = 24\n",
"100% 84/84 [00:17<00:00, 4.81it/s]\n",
"INFO:__main__:***** Predict results None *****\n",
"[INFO|modelcard.py:449] 2023-02-12 22:30:09,657 >> Dropping the following result as it does not have all the necessary fields:\n",
"{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.9300000071525574}]}\n"
]
}
]
}
]
}