projekt-glebokie/GPT_2.ipynb

1490 lines
79 KiB
Plaintext
Raw Permalink Normal View History

2023-02-10 22:26:16 +01:00
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU",
2023-02-12 20:03:40 +01:00
"gpuClass": "standard",
2023-02-10 22:26:16 +01:00
"widgets": {
"application/vnd.jupyter.widget-state+json": {
2023-02-13 00:49:10 +01:00
"f0fc084b95e0408a9d77d4051a540f2d": {
2023-02-10 22:26:16 +01:00
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
2023-02-13 00:49:10 +01:00
"IPY_MODEL_f32509354c994a148ece1bf2f5d2fb66",
"IPY_MODEL_1cfa03eaaa7f4750af69da815f3f8360",
"IPY_MODEL_69e5f2a83b884fc7a640accaa27b5600"
2023-02-10 22:26:16 +01:00
],
2023-02-13 00:49:10 +01:00
"layout": "IPY_MODEL_b16b3d7a825a4435bab3dd8bdb26702d"
2023-02-10 22:26:16 +01:00
}
},
2023-02-13 00:49:10 +01:00
"f32509354c994a148ece1bf2f5d2fb66": {
2023-02-10 22:26:16 +01:00
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
2023-02-13 00:49:10 +01:00
"layout": "IPY_MODEL_0f169cc9432649b9bc990ebed23faa47",
2023-02-10 22:26:16 +01:00
"placeholder": "",
2023-02-13 00:49:10 +01:00
"style": "IPY_MODEL_e5e7f54b635748da9fb170c6819e6368",
2023-02-12 20:03:40 +01:00
"value": "100%"
2023-02-10 22:26:16 +01:00
}
},
2023-02-13 00:49:10 +01:00
"1cfa03eaaa7f4750af69da815f3f8360": {
2023-02-10 22:26:16 +01:00
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
2023-02-13 00:49:10 +01:00
"layout": "IPY_MODEL_c3361a78031047bca9494db148aa9c60",
2023-02-10 22:26:16 +01:00
"max": 3,
"min": 0,
"orientation": "horizontal",
2023-02-13 00:49:10 +01:00
"style": "IPY_MODEL_c0376b60cd6643a4b14c5f88f1feabfd",
2023-02-10 22:26:16 +01:00
"value": 3
}
},
2023-02-13 00:49:10 +01:00
"69e5f2a83b884fc7a640accaa27b5600": {
2023-02-10 22:26:16 +01:00
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"model_module_version": "1.5.0",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
2023-02-13 00:49:10 +01:00
"layout": "IPY_MODEL_8582e82344404f68a3f89033e0f4987e",
2023-02-10 22:26:16 +01:00
"placeholder": "",
2023-02-13 00:49:10 +01:00
"style": "IPY_MODEL_ba03ab4c843c42909fbeb4ff411186d6",
"value": " 3/3 [00:00<00:00, 31.73it/s]"
2023-02-10 22:26:16 +01:00
}
},
2023-02-13 00:49:10 +01:00
"b16b3d7a825a4435bab3dd8bdb26702d": {
2023-02-10 22:26:16 +01:00
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
2023-02-13 00:49:10 +01:00
"0f169cc9432649b9bc990ebed23faa47": {
2023-02-10 22:26:16 +01:00
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
2023-02-13 00:49:10 +01:00
"e5e7f54b635748da9fb170c6819e6368": {
2023-02-10 22:26:16 +01:00
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
2023-02-13 00:49:10 +01:00
"c3361a78031047bca9494db148aa9c60": {
2023-02-10 22:26:16 +01:00
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
2023-02-13 00:49:10 +01:00
"c0376b60cd6643a4b14c5f88f1feabfd": {
2023-02-10 22:26:16 +01:00
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
2023-02-13 00:49:10 +01:00
"8582e82344404f68a3f89033e0f4987e": {
2023-02-10 22:26:16 +01:00
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"model_module_version": "1.2.0",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
2023-02-13 00:49:10 +01:00
"ba03ab4c843c42909fbeb4ff411186d6": {
2023-02-10 22:26:16 +01:00
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"model_module_version": "1.5.0",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
2023-02-12 20:03:40 +01:00
}
}
}
},
"cells": [
{
2023-02-13 00:49:10 +01:00
"cell_type": "markdown",
"source": [
"# Setup"
],
2023-02-12 20:03:40 +01:00
"metadata": {
2023-02-13 00:49:10 +01:00
"id": "n2A5EThJNiAy"
}
2023-02-12 20:03:40 +01:00
},
{
2023-02-13 00:49:10 +01:00
"cell_type": "markdown",
2023-02-12 20:03:40 +01:00
"source": [
2023-02-13 00:49:10 +01:00
"## Requirements"
2023-02-12 20:03:40 +01:00
],
2023-02-13 00:49:10 +01:00
"metadata": {
"id": "tPp2_1rDOFYA"
}
},
{
"cell_type": "code",
"execution_count": null,
2023-02-12 20:03:40 +01:00
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
2023-02-10 22:26:16 +01:00
},
2023-02-13 00:49:10 +01:00
"id": "OmsX3kG4bLTg",
"outputId": "cd31b31c-3840-490c-b57f-18edfe8d847a"
2023-02-12 20:03:40 +01:00
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
2023-02-13 00:49:10 +01:00
"Requirement already satisfied: torch in /usr/local/lib/python3.8/dist-packages (1.13.1+cu116)\n",
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.8/dist-packages (from torch) (4.4.0)\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: datasets in /usr/local/lib/python3.8/dist-packages (2.9.0)\n",
2023-02-12 22:17:41 +01:00
"Requirement already satisfied: aiohttp in /usr/local/lib/python3.8/dist-packages (from datasets) (3.8.3)\n",
2023-02-12 23:36:28 +01:00
"Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.8/dist-packages (from datasets) (2.25.1)\n",
2023-02-13 00:49:10 +01:00
"Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.8/dist-packages (from datasets) (4.64.1)\n",
2023-02-12 23:36:28 +01:00
"Requirement already satisfied: pandas in /usr/local/lib/python3.8/dist-packages (from datasets) (1.3.5)\n",
2023-02-13 00:49:10 +01:00
"Requirement already satisfied: huggingface-hub<1.0.0,>=0.2.0 in /usr/local/lib/python3.8/dist-packages (from datasets) (0.12.0)\n",
"Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.8/dist-packages (from datasets) (0.18.0)\n",
"Requirement already satisfied: dill<0.3.7 in /usr/local/lib/python3.8/dist-packages (from datasets) (0.3.6)\n",
"Requirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.8/dist-packages (from datasets) (9.0.0)\n",
"Requirement already satisfied: xxhash in /usr/local/lib/python3.8/dist-packages (from datasets) (3.2.0)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from datasets) (1.21.6)\n",
"Requirement already satisfied: multiprocess in /usr/local/lib/python3.8/dist-packages (from datasets) (0.70.14)\n",
"Requirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.8/dist-packages (from datasets) (2023.1.0)\n",
2023-02-12 23:36:28 +01:00
"Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from datasets) (23.0)\n",
2023-02-13 00:49:10 +01:00
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.8/dist-packages (from datasets) (6.0)\n",
2023-02-12 23:36:28 +01:00
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (4.0.2)\n",
2023-02-13 00:49:10 +01:00
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.3)\n",
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (22.2.0)\n",
2023-02-12 20:03:40 +01:00
"Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (2.1.1)\n",
2023-02-12 22:17:41 +01:00
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.1)\n",
2023-02-13 00:49:10 +01:00
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.8.2)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (6.0.4)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.8/dist-packages (from huggingface-hub<1.0.0,>=0.2.0->datasets) (4.4.0)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from huggingface-hub<1.0.0,>=0.2.0->datasets) (3.9.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->datasets) (2022.12.7)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->datasets) (1.26.14)\n",
2023-02-12 20:03:40 +01:00
"Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->datasets) (4.0.0)\n",
2023-02-12 22:17:41 +01:00
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->datasets) (2.10)\n",
2023-02-12 23:36:28 +01:00
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas->datasets) (2.8.2)\n",
2023-02-13 00:49:10 +01:00
"Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas->datasets) (2022.7.1)\n",
2023-02-12 20:03:40 +01:00
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.8/dist-packages (from python-dateutil>=2.7.3->pandas->datasets) (1.15.0)\n",
2023-02-13 00:49:10 +01:00
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: transformers in /usr/local/lib/python3.8/dist-packages (4.26.1)\n",
"Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.8/dist-packages (from transformers) (2022.6.2)\n",
"Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.8/dist-packages (from transformers) (0.13.2)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.8/dist-packages (from transformers) (6.0)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from transformers) (1.21.6)\n",
"Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.8/dist-packages (from transformers) (4.64.1)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from transformers) (3.9.0)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.11.0 in /usr/local/lib/python3.8/dist-packages (from transformers) (0.12.0)\n",
"Requirement already satisfied: requests in /usr/local/lib/python3.8/dist-packages (from transformers) (2.25.1)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from transformers) (23.0)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.8/dist-packages (from huggingface-hub<1.0,>=0.11.0->transformers) (4.4.0)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (1.26.14)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (2.10)\n",
"Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (4.0.0)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests->transformers) (2022.12.7)\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: scikit-learn in /usr/local/lib/python3.8/dist-packages (1.0.2)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (3.1.0)\n",
"Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.2.0)\n",
"Requirement already satisfied: scipy>=1.1.0 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.7.3)\n",
"Requirement already satisfied: numpy>=1.14.6 in /usr/local/lib/python3.8/dist-packages (from scikit-learn) (1.21.6)\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: evaluate in /usr/local/lib/python3.8/dist-packages (0.4.0)\n",
"Requirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.8/dist-packages (from evaluate) (2023.1.0)\n",
"Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.8/dist-packages (from evaluate) (2.25.1)\n",
"Requirement already satisfied: pandas in /usr/local/lib/python3.8/dist-packages (from evaluate) (1.3.5)\n",
"Requirement already satisfied: multiprocess in /usr/local/lib/python3.8/dist-packages (from evaluate) (0.70.14)\n",
"Requirement already satisfied: huggingface-hub>=0.7.0 in /usr/local/lib/python3.8/dist-packages (from evaluate) (0.12.0)\n",
"Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from evaluate) (23.0)\n",
"Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.8/dist-packages (from evaluate) (4.64.1)\n",
"Requirement already satisfied: dill in /usr/local/lib/python3.8/dist-packages (from evaluate) (0.3.6)\n",
"Requirement already satisfied: datasets>=2.0.0 in /usr/local/lib/python3.8/dist-packages (from evaluate) (2.9.0)\n",
"Requirement already satisfied: responses<0.19 in /usr/local/lib/python3.8/dist-packages (from evaluate) (0.18.0)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from evaluate) (1.21.6)\n",
"Requirement already satisfied: xxhash in /usr/local/lib/python3.8/dist-packages (from evaluate) (3.2.0)\n",
"Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.8/dist-packages (from datasets>=2.0.0->evaluate) (6.0)\n",
"Requirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.8/dist-packages (from datasets>=2.0.0->evaluate) (9.0.0)\n",
"Requirement already satisfied: aiohttp in /usr/local/lib/python3.8/dist-packages (from datasets>=2.0.0->evaluate) (3.8.3)\n",
"Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from huggingface-hub>=0.7.0->evaluate) (3.9.0)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.8/dist-packages (from huggingface-hub>=0.7.0->evaluate) (4.4.0)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->evaluate) (2.10)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->evaluate) (2022.12.7)\n",
"Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->evaluate) (4.0.0)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->evaluate) (1.26.14)\n",
"Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas->evaluate) (2022.7.1)\n",
"Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas->evaluate) (2.8.2)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (6.0.4)\n",
"Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (22.2.0)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.3.1)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.3.3)\n",
"Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (1.8.2)\n",
"Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (2.1.1)\n",
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets>=2.0.0->evaluate) (4.0.2)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.8/dist-packages (from python-dateutil>=2.7.3->pandas->evaluate) (1.15.0)\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: accelerate in /usr/local/lib/python3.8/dist-packages (0.16.0)\n",
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from accelerate) (23.0)\n",
"Requirement already satisfied: psutil in /usr/local/lib/python3.8/dist-packages (from accelerate) (5.4.8)\n",
"Requirement already satisfied: torch>=1.4.0 in /usr/local/lib/python3.8/dist-packages (from accelerate) (1.13.1+cu116)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from accelerate) (1.21.6)\n",
"Requirement already satisfied: pyyaml in /usr/local/lib/python3.8/dist-packages (from accelerate) (6.0)\n",
"Requirement already satisfied: typing-extensions in /usr/local/lib/python3.8/dist-packages (from torch>=1.4.0->accelerate) (4.4.0)\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: sentencepiece in /usr/local/lib/python3.8/dist-packages (0.1.97)\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: protobuf in /usr/local/lib/python3.8/dist-packages (3.19.6)\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: sacrebleu in /usr/local/lib/python3.8/dist-packages (2.3.1)\n",
"Requirement already satisfied: regex in /usr/local/lib/python3.8/dist-packages (from sacrebleu) (2022.6.2)\n",
"Requirement already satisfied: tabulate>=0.8.9 in /usr/local/lib/python3.8/dist-packages (from sacrebleu) (0.8.10)\n",
"Requirement already satisfied: colorama in /usr/local/lib/python3.8/dist-packages (from sacrebleu) (0.4.6)\n",
"Requirement already satisfied: portalocker in /usr/local/lib/python3.8/dist-packages (from sacrebleu) (2.7.0)\n",
"Requirement already satisfied: lxml in /usr/local/lib/python3.8/dist-packages (from sacrebleu) (4.9.2)\n",
"Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from sacrebleu) (1.21.6)\n",
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
"Requirement already satisfied: py7zr in /usr/local/lib/python3.8/dist-packages (0.20.4)\n",
"Requirement already satisfied: inflate64>=0.3.1 in /usr/local/lib/python3.8/dist-packages (from py7zr) (0.3.1)\n",
"Requirement already satisfied: pybcj>=0.6.0 in /usr/local/lib/python3.8/dist-packages (from py7zr) (1.0.1)\n",
"Requirement already satisfied: pyzstd>=0.14.4 in /usr/local/lib/python3.8/dist-packages (from py7zr) (0.15.3)\n",
"Requirement already satisfied: multivolumefile>=0.2.3 in /usr/local/lib/python3.8/dist-packages (from py7zr) (0.2.3)\n",
"Requirement already satisfied: pycryptodomex>=3.6.6 in /usr/local/lib/python3.8/dist-packages (from py7zr) (3.17)\n",
"Requirement already satisfied: brotli>=1.0.9 in /usr/local/lib/python3.8/dist-packages (from py7zr) (1.0.9)\n",
"Requirement already satisfied: psutil in /usr/local/lib/python3.8/dist-packages (from py7zr) (5.4.8)\n",
"Requirement already satisfied: texttable in /usr/local/lib/python3.8/dist-packages (from py7zr) (1.6.7)\n",
"Requirement already satisfied: pyppmd<1.1.0,>=0.18.1 in /usr/local/lib/python3.8/dist-packages (from py7zr) (1.0.0)\n"
2023-02-12 20:03:40 +01:00
]
}
2023-02-13 00:49:10 +01:00
],
"source": [
"!pip install torch\n",
"!pip install datasets\n",
"!pip install transformers\n",
"!pip install scikit-learn\n",
"!pip install evaluate\n",
"!pip install accelerate\n",
"!pip install sentencepiece\n",
"!pip install protobuf\n",
"!pip install sacrebleu\n",
"!pip install py7zr\n"
2023-02-12 20:03:40 +01:00
]
},
{
"cell_type": "markdown",
2023-02-13 00:49:10 +01:00
"source": [
"## Imports"
],
2023-02-12 20:03:40 +01:00
"metadata": {
2023-02-13 00:49:10 +01:00
"id": "o3Kj9IzuOKMi"
2023-02-12 20:03:40 +01:00
}
},
{
"cell_type": "code",
"source": [
2023-02-13 00:49:10 +01:00
"import os\n",
"import json\n",
"import torch\n",
"from google.colab import drive\n",
"from pathlib import Path\n",
"from typing import Dict, List\n",
"from datasets import load_dataset\n",
"from transformers import T5Tokenizer"
],
"metadata": {
"id": "r92S06noeSWE"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"## Loading data"
],
"metadata": {
"id": "2UzLo91gNnsA"
}
},
{
"cell_type": "code",
"source": [
"loaded_data = load_dataset('emotion')\n",
"!mkdir -v -p data\n",
"train_path = Path('data/train.json')\n",
"valid_path = Path('data/valid.json')\n",
"test_path = Path('data/test.json')\n",
"data_train, data_valid, data_test = [], [], []"
2023-02-12 20:03:40 +01:00
],
"metadata": {
"colab": {
2023-02-13 00:49:10 +01:00
"base_uri": "https://localhost:8080/",
"height": 84,
"referenced_widgets": [
"f0fc084b95e0408a9d77d4051a540f2d",
"f32509354c994a148ece1bf2f5d2fb66",
"1cfa03eaaa7f4750af69da815f3f8360",
"69e5f2a83b884fc7a640accaa27b5600",
"b16b3d7a825a4435bab3dd8bdb26702d",
"0f169cc9432649b9bc990ebed23faa47",
"e5e7f54b635748da9fb170c6819e6368",
"c3361a78031047bca9494db148aa9c60",
"c0376b60cd6643a4b14c5f88f1feabfd",
"8582e82344404f68a3f89033e0f4987e",
"ba03ab4c843c42909fbeb4ff411186d6"
]
2023-02-10 22:26:16 +01:00
},
2023-02-13 00:49:10 +01:00
"id": "n_miey7eb2Xr",
"outputId": "273a8199-b14f-4a19-f9e1-a2961c2653bc"
2023-02-12 20:03:40 +01:00
},
2023-02-13 00:49:10 +01:00
"execution_count": null,
2023-02-12 20:03:40 +01:00
"outputs": [
{
"output_type": "stream",
2023-02-13 00:49:10 +01:00
"name": "stderr",
2023-02-12 20:03:40 +01:00
"text": [
2023-02-13 00:49:10 +01:00
"WARNING:datasets.builder:No config specified, defaulting to: emotion/split\n",
"WARNING:datasets.builder:Found cached dataset emotion (/root/.cache/huggingface/datasets/emotion/split/1.0.0/cca5efe2dfeb58c1d098e0f9eeb200e9927d889b5a03c67097275dfb5fe463bd)\n"
2023-02-10 22:26:16 +01:00
]
2023-02-13 00:49:10 +01:00
},
{
"output_type": "display_data",
"data": {
"text/plain": [
" 0%| | 0/3 [00:00<?, ?it/s]"
],
"application/vnd.jupyter.widget-view+json": {
"version_major": 2,
"version_minor": 0,
"model_id": "f0fc084b95e0408a9d77d4051a540f2d"
}
},
"metadata": {}
2023-02-10 22:26:16 +01:00
}
]
},
{
"cell_type": "code",
2023-02-12 20:03:40 +01:00
"source": [
"for source_data, dataset, max_size in [\n",
" (loaded_data['train'], data_train, None),\n",
2023-02-12 23:36:28 +01:00
" (loaded_data['validation'], data_valid, None),\n",
" (loaded_data['test'], data_test, None),\n",
2023-02-12 20:03:40 +01:00
"]:\n",
" for i, data in enumerate(source_data):\n",
" if max_size is not None and i >= max_size:\n",
" break\n",
" data_line = {\n",
" 'label': int(data['label']),\n",
" 'text': data['text'],\n",
" }\n",
" dataset.append(data_line)\n",
"\n",
"print(f'Train: {len(data_train):6d}')\n",
"print(f'Valid: {len(data_valid):6d}')\n",
2023-02-13 00:49:10 +01:00
"print(f'Test: {len(data_test):6d}')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "BZ6afaRzGsxS",
"outputId": "139aaaf0-ea67-4ed2-bfa4-68caa7dd61e8"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Train: 16000\n",
"Valid: 2000\n",
"Test: 2000\n"
]
}
]
},
{
"cell_type": "code",
"source": [
2023-02-12 20:03:40 +01:00
"MAP_LABEL_TRANSLATION = {\n",
" 0: 'sadness',\n",
" 1: 'joy',\n",
" 2: 'love',\n",
" 3: 'anger',\n",
" 4: 'fear',\n",
" 5: 'surprise',\n",
2023-02-13 00:49:10 +01:00
"}"
],
"metadata": {
"id": "w0KyM4TrGxQY"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
2023-02-12 20:03:40 +01:00
"def save_as_translations(original_save_path: Path, data_to_save: List[Dict]) -> None:\n",
" file_name = 's2s-' + original_save_path.name\n",
" file_path = original_save_path.parent / file_name\n",
"\n",
" print(f'Saving into: {file_path}')\n",
" with open(file_path, 'wt') as f_write:\n",
" for data_line in data_to_save:\n",
" label = data_line['label']\n",
" new_label = MAP_LABEL_TRANSLATION[label]\n",
" data_line['label'] = new_label\n",
" data_line_str = json.dumps(data_line)\n",
2023-02-13 00:49:10 +01:00
" f_write.write(f'{data_line_str}\\n')"
],
"metadata": {
"id": "-EFRYeAYHIKN"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
2023-02-12 20:03:40 +01:00
"for file_path, data_to_save in [(train_path, data_train), (valid_path, data_valid), (test_path, data_test)]:\n",
" print(f'Saving into: {file_path}')\n",
" with open(file_path, 'wt') as f_write:\n",
" for data_line in data_to_save:\n",
" data_line_str = json.dumps(data_line)\n",
" f_write.write(f'{data_line_str}\\n')\n",
" \n",
2023-02-13 00:49:10 +01:00
" save_as_translations(file_path, data_to_save)"
2023-02-12 20:03:40 +01:00
],
2023-02-10 22:26:16 +01:00
"metadata": {
"colab": {
2023-02-13 00:49:10 +01:00
"base_uri": "https://localhost:8080/"
2023-02-10 22:26:16 +01:00
},
2023-02-13 00:49:10 +01:00
"id": "7RsrTNGCHIqc",
"outputId": "5cc59bc4-f71a-4b7b-ff27-f0f638f19fc9"
2023-02-10 22:26:16 +01:00
},
2023-02-13 00:49:10 +01:00
"execution_count": null,
2023-02-10 22:26:16 +01:00
"outputs": [
{
2023-02-12 20:03:40 +01:00
"output_type": "stream",
"name": "stdout",
"text": [
"Saving into: data/train.json\n",
"Saving into: data/s2s-train.json\n",
"Saving into: data/valid.json\n",
"Saving into: data/s2s-valid.json\n",
"Saving into: data/test.json\n",
"Saving into: data/s2s-test.json\n"
]
}
]
},
{
"cell_type": "code",
"source": [
2023-02-13 00:49:10 +01:00
"!head data/train.json"
2023-02-12 20:03:40 +01:00
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
2023-02-13 00:49:10 +01:00
"id": "Svu6YYSaHK4t",
"outputId": "3d90aaa5-7477-4d26-a1ce-8d830fe51178"
2023-02-12 20:03:40 +01:00
},
2023-02-13 00:49:10 +01:00
"execution_count": null,
2023-02-12 20:03:40 +01:00
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
2023-02-13 00:49:10 +01:00
"{\"label\": 0, \"text\": \"i didnt feel humiliated\"}\n",
"{\"label\": 0, \"text\": \"i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake\"}\n",
"{\"label\": 3, \"text\": \"im grabbing a minute to post i feel greedy wrong\"}\n",
"{\"label\": 2, \"text\": \"i am ever feeling nostalgic about the fireplace i will know that it is still on the property\"}\n",
"{\"label\": 3, \"text\": \"i am feeling grouchy\"}\n",
"{\"label\": 0, \"text\": \"ive been feeling a little burdened lately wasnt sure why that was\"}\n",
"{\"label\": 5, \"text\": \"ive been taking or milligrams or times recommended amount and ive fallen asleep a lot faster but i also feel like so funny\"}\n",
"{\"label\": 4, \"text\": \"i feel as confused about life as a teenager or as jaded as a year old man\"}\n",
"{\"label\": 1, \"text\": \"i have been with petronas for years i feel that petronas has performed well and made a huge profit\"}\n",
"{\"label\": 2, \"text\": \"i feel romantic too\"}\n"
2023-02-12 20:03:40 +01:00
]
}
]
},
{
"cell_type": "code",
"source": [
2023-02-13 00:49:10 +01:00
"!head data/s2s-train.json"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5INZa4ZJHQbn",
"outputId": "12a2bbf0-fe51-4d63-de46-de63182657a9"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{\"label\": \"sadness\", \"text\": \"i didnt feel humiliated\"}\n",
"{\"label\": \"sadness\", \"text\": \"i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake\"}\n",
"{\"label\": \"anger\", \"text\": \"im grabbing a minute to post i feel greedy wrong\"}\n",
"{\"label\": \"love\", \"text\": \"i am ever feeling nostalgic about the fireplace i will know that it is still on the property\"}\n",
"{\"label\": \"anger\", \"text\": \"i am feeling grouchy\"}\n",
"{\"label\": \"sadness\", \"text\": \"ive been feeling a little burdened lately wasnt sure why that was\"}\n",
"{\"label\": \"surprise\", \"text\": \"ive been taking or milligrams or times recommended amount and ive fallen asleep a lot faster but i also feel like so funny\"}\n",
"{\"label\": \"fear\", \"text\": \"i feel as confused about life as a teenager or as jaded as a year old man\"}\n",
"{\"label\": \"joy\", \"text\": \"i have been with petronas for years i feel that petronas has performed well and made a huge profit\"}\n",
"{\"label\": \"love\", \"text\": \"i feel romantic too\"}\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# create tiny datasets for debugging purposes\n",
"for file_name in [\"train\", \"valid\", \"test\"]:\n",
2023-02-12 20:03:40 +01:00
" print(f\"=== {file_name} ===\")\n",
" all_text = Path(f\"data/{file_name}.json\").read_text().split('\\n')\n",
2023-02-13 00:49:10 +01:00
" text = all_text[:250] + all_text[-250:]\n",
" Path(f\"data/{file_name}-500.json\").write_text(\"\\n\".join(text))"
2023-02-12 20:03:40 +01:00
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
2023-02-10 22:26:16 +01:00
},
2023-02-13 00:49:10 +01:00
"id": "OYeI-JvepSf7",
"outputId": "9f2a4bf8-a8c5-4ffb-b3f1-b1fe1157d997"
2023-02-12 20:03:40 +01:00
},
2023-02-13 00:49:10 +01:00
"execution_count": null,
2023-02-12 20:03:40 +01:00
"outputs": [
2023-02-10 22:26:16 +01:00
{
2023-02-12 20:03:40 +01:00
"output_type": "stream",
"name": "stdout",
"text": [
"=== train ===\n",
"=== valid ===\n",
2023-02-13 00:49:10 +01:00
"=== test ===\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!wc -l data/*"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "_WSOgm50LI0m",
"outputId": "2d4df642-b657-4e00-9b3b-c1408f7beb40"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" 2000 data/s2s-test.json\n",
" 16000 data/s2s-train.json\n",
" 2000 data/s2s-valid.json\n",
" 499 data/test-500.json\n",
" 2000 data/test.json\n",
" 499 data/train-500.json\n",
" 16000 data/train.json\n",
" 499 data/valid-500.json\n",
" 2000 data/valid.json\n",
" 41497 total\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"# GPU Info"
],
"metadata": {
"id": "b78jArQhN2Jb"
}
},
{
"cell_type": "code",
"source": [
"!nvidia-smi"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TZk2ZwJML4Wz",
"outputId": "4fd092bf-813e-4e83-9b13-cf3d62baf56f"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Sun Feb 12 23:30:18 2023 \n",
"+-----------------------------------------------------------------------------+\n",
"| NVIDIA-SMI 510.47.03 Driver Version: 510.47.03 CUDA Version: 11.6 |\n",
"|-------------------------------+----------------------+----------------------+\n",
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
"| | | MIG M. |\n",
"|===============================+======================+======================|\n",
"| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n",
"| N/A 56C P0 26W / 70W | 0MiB / 15360MiB | 0% Default |\n",
"| | | N/A |\n",
"+-------------------------------+----------------------+----------------------+\n",
" \n",
"+-----------------------------------------------------------------------------+\n",
"| Processes: |\n",
"| GPU GI CI PID Type Process name GPU Memory |\n",
"| ID ID Usage |\n",
"|=============================================================================|\n",
"| No running processes found |\n",
"+-----------------------------------------------------------------------------+\n"
2023-02-12 20:03:40 +01:00
]
}
]
},
{
"cell_type": "code",
"source": [
"os.environ['TOKENIZERS_PARALLELISM'] = 'true'"
],
"metadata": {
2023-02-13 00:49:10 +01:00
"id": "e-ssYW1WL71Y"
2023-02-12 20:03:40 +01:00
},
2023-02-13 00:49:10 +01:00
"execution_count": null,
2023-02-12 20:03:40 +01:00
"outputs": []
},
2023-02-13 00:49:10 +01:00
{
"cell_type": "markdown",
"source": [
"# Run"
],
"metadata": {
"id": "gMK8qKF_dq5s"
}
},
{
"cell_type": "code",
"source": [
"!wget 'https://git.wmi.amu.edu.pl/s444465/projekt-glebokie/raw/branch/master/run_glue.py' -O 'run_glue.py'"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "f-NS2jDZdsMd",
"outputId": "445b069d-8628-4924-8d57-a0aaa0e8b964"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"--2023-02-12 23:30:18-- https://git.wmi.amu.edu.pl/s444465/projekt-glebokie/raw/branch/master/run_glue.py\n",
"Resolving git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)... 150.254.78.40\n",
"Connecting to git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)|150.254.78.40|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 30601 (30K) [text/plain]\n",
"Saving to: run_glue.py\n",
"\n",
"run_glue.py 100%[===================>] 29.88K --.-KB/s in 0.03s \n",
"\n",
"2023-02-12 23:30:18 (982 KB/s) - run_glue.py saved [30601/30601]\n",
"\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!wget 'https://git.wmi.amu.edu.pl/s444465/projekt-glebokie/raw/branch/master/roberta.py' -O 'roberta.py'"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "rdUCXArmhliH",
"outputId": "f01832ae-2206-4f47-ae10-e50ed0d71c45"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"--2023-02-12 23:30:18-- https://git.wmi.amu.edu.pl/s444465/projekt-glebokie/raw/branch/master/roberta.py\n",
"Resolving git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)... 150.254.78.40\n",
"Connecting to git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)|150.254.78.40|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 12783 (12K) [text/plain]\n",
"Saving to: roberta.py\n",
"\n",
"roberta.py 100%[===================>] 12.48K --.-KB/s in 0s \n",
"\n",
"2023-02-12 23:30:18 (263 MB/s) - roberta.py saved [12783/12783]\n",
"\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"!wget 'https://git.wmi.amu.edu.pl/s444465/projekt-glebokie/raw/branch/master/gpt2.py' -O 'gpt2.py'"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "nw9Y56QukENR",
"outputId": "fe01c608-1dfe-4e4c-fd79-d6301fe0a2fe"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"--2023-02-12 23:30:18-- https://git.wmi.amu.edu.pl/s444465/projekt-glebokie/raw/branch/master/gpt2.py\n",
"Resolving git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)... 150.254.78.40\n",
"Connecting to git.wmi.amu.edu.pl (git.wmi.amu.edu.pl)|150.254.78.40|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 8017 (7.8K) [text/plain]\n",
"Saving to: gpt2.py\n",
"\n",
"gpt2.py 100%[===================>] 7.83K --.-KB/s in 0s \n",
"\n",
"2023-02-12 23:30:19 (1.42 GB/s) - gpt2.py saved [8017/8017]\n",
"\n"
]
}
]
},
2023-02-12 20:03:40 +01:00
{
"cell_type": "code",
2023-02-13 00:49:10 +01:00
"source": [
"torch.cuda.empty_cache()"
],
2023-02-12 20:03:40 +01:00
"metadata": {
2023-02-13 00:49:10 +01:00
"id": "2iIR3yh8dyPZ"
2023-02-12 20:03:40 +01:00
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
2023-02-13 00:49:10 +01:00
"! python run_glue.py \\\n",
" --cache_dir .cache_training \\\n",
" --model_name_or_path gpt2 \\\n",
" --custom_model gpt2_hidden \\\n",
" --train_file data/train.json \\\n",
" --validation_file data/valid.json \\\n",
" --test_file data/test.json \\\n",
" --per_device_train_batch_size 8 \\\n",
" --per_device_eval_batch_size 8 \\\n",
" --do_train \\\n",
" --do_eval \\\n",
" --do_predict \\\n",
" --max_seq_length 128 \\\n",
" --num_train_epochs 1 \\\n",
" --metric_for_best_model accuracy \\\n",
" --greater_is_better True \\\n",
" --overwrite_output_dir \\\n",
" --output_dir out/emotion/gpt2"
2023-02-12 20:03:40 +01:00
],
"metadata": {
"colab": {
2023-02-12 23:36:28 +01:00
"base_uri": "https://localhost:8080/"
2023-02-10 22:26:16 +01:00
},
2023-02-13 00:49:10 +01:00
"id": "6KFVQFCqdyw6",
"outputId": "09b3934c-bc30-4349-e25a-af24544f86f3"
2023-02-12 20:03:40 +01:00
},
2023-02-13 00:49:10 +01:00
"execution_count": null,
2023-02-12 20:03:40 +01:00
"outputs": [
2023-02-10 22:26:16 +01:00
{
"output_type": "stream",
"name": "stdout",
"text": [
2023-02-13 00:49:10 +01:00
"2023-02-12 23:30:29.286531: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-12 23:30:29.287316: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia\n",
"2023-02-12 23:30:29.287348: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n",
2023-02-12 20:03:40 +01:00
"WARNING:__main__:Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: False\n",
"INFO:__main__:Training/evaluation parameters TrainingArguments(\n",
"_n_gpu=1,\n",
"adafactor=False,\n",
"adam_beta1=0.9,\n",
"adam_beta2=0.999,\n",
"adam_epsilon=1e-08,\n",
"auto_find_batch_size=False,\n",
"bf16=False,\n",
"bf16_full_eval=False,\n",
"data_seed=None,\n",
"dataloader_drop_last=False,\n",
"dataloader_num_workers=0,\n",
"dataloader_pin_memory=True,\n",
"ddp_bucket_cap_mb=None,\n",
"ddp_find_unused_parameters=None,\n",
"ddp_timeout=1800,\n",
"debug=[],\n",
"deepspeed=None,\n",
"disable_tqdm=False,\n",
"do_eval=True,\n",
"do_predict=True,\n",
"do_train=True,\n",
"eval_accumulation_steps=None,\n",
"eval_delay=0,\n",
"eval_steps=None,\n",
"evaluation_strategy=no,\n",
"fp16=False,\n",
"fp16_backend=auto,\n",
"fp16_full_eval=False,\n",
"fp16_opt_level=O1,\n",
"fsdp=[],\n",
"fsdp_min_num_params=0,\n",
"fsdp_transformer_layer_cls_to_wrap=None,\n",
"full_determinism=False,\n",
"gradient_accumulation_steps=1,\n",
"gradient_checkpointing=False,\n",
2023-02-13 00:49:10 +01:00
"greater_is_better=True,\n",
2023-02-12 20:03:40 +01:00
"group_by_length=False,\n",
"half_precision_backend=auto,\n",
"hub_model_id=None,\n",
"hub_private_repo=False,\n",
"hub_strategy=every_save,\n",
"hub_token=<HUB_TOKEN>,\n",
"ignore_data_skip=False,\n",
"include_inputs_for_metrics=False,\n",
"jit_mode_eval=False,\n",
"label_names=None,\n",
"label_smoothing_factor=0.0,\n",
2023-02-13 00:49:10 +01:00
"learning_rate=5e-05,\n",
2023-02-12 20:03:40 +01:00
"length_column_name=length,\n",
"load_best_model_at_end=False,\n",
"local_rank=-1,\n",
"log_level=passive,\n",
"log_level_replica=passive,\n",
"log_on_each_node=True,\n",
2023-02-13 00:49:10 +01:00
"logging_dir=out/emotion/gpt2/runs/Feb12_23-30-34_2740c0a1a5dc,\n",
2023-02-12 20:03:40 +01:00
"logging_first_step=False,\n",
"logging_nan_inf_filter=True,\n",
"logging_steps=500,\n",
"logging_strategy=steps,\n",
"lr_scheduler_type=linear,\n",
"max_grad_norm=1.0,\n",
"max_steps=-1,\n",
2023-02-13 00:49:10 +01:00
"metric_for_best_model=accuracy,\n",
2023-02-12 20:03:40 +01:00
"mp_parameters=,\n",
"no_cuda=False,\n",
2023-02-13 00:49:10 +01:00
"num_train_epochs=1.0,\n",
2023-02-12 20:03:40 +01:00
"optim=adamw_hf,\n",
"optim_args=None,\n",
2023-02-13 00:49:10 +01:00
"output_dir=out/emotion/gpt2,\n",
"overwrite_output_dir=True,\n",
2023-02-12 20:03:40 +01:00
"past_index=-1,\n",
2023-02-13 00:49:10 +01:00
"per_device_eval_batch_size=8,\n",
"per_device_train_batch_size=8,\n",
2023-02-12 20:03:40 +01:00
"prediction_loss_only=False,\n",
"push_to_hub=False,\n",
"push_to_hub_model_id=None,\n",
"push_to_hub_organization=None,\n",
"push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
"ray_scope=last,\n",
"remove_unused_columns=True,\n",
"report_to=['tensorboard'],\n",
"resume_from_checkpoint=None,\n",
2023-02-13 00:49:10 +01:00
"run_name=out/emotion/gpt2,\n",
2023-02-12 20:03:40 +01:00
"save_on_each_node=False,\n",
"save_steps=500,\n",
"save_strategy=steps,\n",
"save_total_limit=None,\n",
"seed=42,\n",
"sharded_ddp=[],\n",
"skip_memory_metrics=True,\n",
"tf32=None,\n",
"torch_compile=False,\n",
"torch_compile_backend=None,\n",
"torch_compile_mode=None,\n",
"torchdynamo=None,\n",
"tpu_metrics_debug=False,\n",
"tpu_num_cores=None,\n",
"use_ipex=False,\n",
"use_legacy_prediction_loop=False,\n",
"use_mps_device=False,\n",
"warmup_ratio=0.0,\n",
"warmup_steps=0,\n",
"weight_decay=0.0,\n",
"xpu_backend=None,\n",
")\n",
2023-02-13 00:49:10 +01:00
"INFO:__main__:load a local file for train: data/train.json\n",
"INFO:__main__:load a local file for validation: data/valid.json\n",
"INFO:__main__:load a local file for test: data/test.json\n",
"WARNING:datasets.builder:Using custom data configuration default-79a9e082059ced07\n",
2023-02-12 20:03:40 +01:00
"INFO:datasets.info:Loading Dataset Infos from /usr/local/lib/python3.8/dist-packages/datasets/packaged_modules/json\n",
2023-02-13 00:49:10 +01:00
"INFO:datasets.builder:Generating dataset json (/content/.cache_training/json/default-79a9e082059ced07/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n",
"Downloading and preparing dataset json/default to /content/.cache_training/json/default-79a9e082059ced07/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n",
"Downloading data files: 100% 3/3 [00:00<00:00, 12384.76it/s]\n",
2023-02-12 20:03:40 +01:00
"INFO:datasets.download.download_manager:Downloading took 0.0 min\n",
"INFO:datasets.download.download_manager:Checksum Computation took 0.0 min\n",
2023-02-13 00:49:10 +01:00
"Extracting data files: 100% 3/3 [00:00<00:00, 1936.13it/s]\n",
2023-02-12 20:03:40 +01:00
"INFO:datasets.utils.info_utils:Unable to verify checksums.\n",
"INFO:datasets.builder:Generating train split\n",
"INFO:datasets.builder:Generating validation split\n",
"INFO:datasets.builder:Generating test split\n",
"INFO:datasets.utils.info_utils:Unable to verify splits sizes.\n",
2023-02-13 00:49:10 +01:00
"Dataset json downloaded and prepared to /content/.cache_training/json/default-79a9e082059ced07/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n",
"100% 3/3 [00:00<00:00, 989.92it/s]\n",
"[INFO|configuration_utils.py:660] 2023-02-12 23:30:36,613 >> loading configuration file config.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
"[INFO|configuration_utils.py:712] 2023-02-12 23:30:36,614 >> Model config GPT2Config {\n",
2023-02-12 20:03:40 +01:00
" \"_name_or_path\": \"gpt2\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"id2label\": {\n",
" \"0\": \"LABEL_0\",\n",
" \"1\": \"LABEL_1\",\n",
" \"2\": \"LABEL_2\",\n",
" \"3\": \"LABEL_3\",\n",
" \"4\": \"LABEL_4\",\n",
" \"5\": \"LABEL_5\"\n",
" },\n",
" \"initializer_range\": 0.02,\n",
" \"label2id\": {\n",
" \"LABEL_0\": 0,\n",
" \"LABEL_1\": 1,\n",
" \"LABEL_2\": 2,\n",
" \"LABEL_3\": 3,\n",
" \"LABEL_4\": 4,\n",
" \"LABEL_5\": 5\n",
" },\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"reorder_and_upcast_attn\": false,\n",
" \"resid_pdrop\": 0.1,\n",
" \"scale_attn_by_inverse_layer_idx\": false,\n",
" \"scale_attn_weights\": true,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.26.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
2023-02-13 00:49:10 +01:00
"[INFO|tokenization_auto.py:458] 2023-02-12 23:30:36,976 >> Could not locate the tokenizer configuration file, will try to use the model config instead.\n",
"[INFO|configuration_utils.py:660] 2023-02-12 23:30:37,341 >> loading configuration file config.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
"[INFO|configuration_utils.py:712] 2023-02-12 23:30:37,342 >> Model config GPT2Config {\n",
2023-02-12 20:03:40 +01:00
" \"_name_or_path\": \"gpt2\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"initializer_range\": 0.02,\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"reorder_and_upcast_attn\": false,\n",
" \"resid_pdrop\": 0.1,\n",
" \"scale_attn_by_inverse_layer_idx\": false,\n",
" \"scale_attn_weights\": true,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.26.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
2023-02-13 00:49:10 +01:00
"[INFO|tokenization_utils_base.py:1802] 2023-02-12 23:30:38,088 >> loading file vocab.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/vocab.json\n",
"[INFO|tokenization_utils_base.py:1802] 2023-02-12 23:30:38,088 >> loading file merges.txt from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/merges.txt\n",
"[INFO|tokenization_utils_base.py:1802] 2023-02-12 23:30:38,088 >> loading file tokenizer.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/tokenizer.json\n",
"[INFO|tokenization_utils_base.py:1802] 2023-02-12 23:30:38,089 >> loading file added_tokens.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1802] 2023-02-12 23:30:38,089 >> loading file special_tokens_map.json from cache at None\n",
"[INFO|tokenization_utils_base.py:1802] 2023-02-12 23:30:38,089 >> loading file tokenizer_config.json from cache at None\n",
"[INFO|configuration_utils.py:660] 2023-02-12 23:30:38,089 >> loading configuration file config.json from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/config.json\n",
"[INFO|configuration_utils.py:712] 2023-02-12 23:30:38,090 >> Model config GPT2Config {\n",
2023-02-12 20:03:40 +01:00
" \"_name_or_path\": \"gpt2\",\n",
" \"activation_function\": \"gelu_new\",\n",
" \"architectures\": [\n",
" \"GPT2LMHeadModel\"\n",
" ],\n",
" \"attn_pdrop\": 0.1,\n",
" \"bos_token_id\": 50256,\n",
" \"embd_pdrop\": 0.1,\n",
" \"eos_token_id\": 50256,\n",
" \"initializer_range\": 0.02,\n",
" \"layer_norm_epsilon\": 1e-05,\n",
" \"model_type\": \"gpt2\",\n",
" \"n_ctx\": 1024,\n",
" \"n_embd\": 768,\n",
" \"n_head\": 12,\n",
" \"n_inner\": null,\n",
" \"n_layer\": 12,\n",
" \"n_positions\": 1024,\n",
" \"reorder_and_upcast_attn\": false,\n",
" \"resid_pdrop\": 0.1,\n",
" \"scale_attn_by_inverse_layer_idx\": false,\n",
" \"scale_attn_weights\": true,\n",
" \"summary_activation\": null,\n",
" \"summary_first_dropout\": 0.1,\n",
" \"summary_proj_to_labels\": true,\n",
" \"summary_type\": \"cls_index\",\n",
" \"summary_use_proj\": true,\n",
" \"task_specific_params\": {\n",
" \"text-generation\": {\n",
" \"do_sample\": true,\n",
" \"max_length\": 50\n",
" }\n",
" },\n",
" \"transformers_version\": \"4.26.1\",\n",
" \"use_cache\": true,\n",
" \"vocab_size\": 50257\n",
"}\n",
"\n",
"INFO:__main__:Using hidden states in model: True\n",
"INFO:__main__:Using implementation from class: GPT2ForSequenceClassificationCustom\n",
2023-02-13 00:49:10 +01:00
"[INFO|modeling_utils.py:2275] 2023-02-12 23:30:38,214 >> loading weights file pytorch_model.bin from cache at .cache_training/models--gpt2/snapshots/e7da7f221d5bf496a48136c0cd264e630fe9fcc8/pytorch_model.bin\n",
"[INFO|modeling_utils.py:2857] 2023-02-12 23:30:43,108 >> All model checkpoint weights were used when initializing GPT2ForSequenceClassificationCustom.\n",
2023-02-12 20:03:40 +01:00
"\n",
2023-02-13 00:49:10 +01:00
"[WARNING|modeling_utils.py:2859] 2023-02-12 23:30:43,108 >> Some weights of GPT2ForSequenceClassificationCustom were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.out_proj.weight', 'score.dense_1_hidden.bias', 'score.dense_3.bias', 'score.dense_1_input.weight', 'score.dense_1_hidden.weight', 'score.dense_3.weight', 'score.dense_1_input.bias', 'score.dense_2.weight', 'score.dense_2.bias']\n",
2023-02-12 20:03:40 +01:00
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
2023-02-13 00:49:10 +01:00
"[ERROR|tokenization_utils_base.py:1042] 2023-02-12 23:30:43,118 >> Using pad_token, but it is not set yet.\n",
2023-02-12 20:03:40 +01:00
"INFO:__main__:Set PAD token to EOS: <|endoftext|>\n",
2023-02-13 00:49:10 +01:00
"Running tokenizer on dataset: 0% 0/16 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/.cache_training/json/default-79a9e082059ced07/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-5493d7f118c94c16.arrow\n",
"Running tokenizer on dataset: 100% 16/16 [00:01<00:00, 9.52ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/.cache_training/json/default-79a9e082059ced07/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-b591b09c51834ed3.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 10.61ba/s]\n",
"Running tokenizer on dataset: 0% 0/2 [00:00<?, ?ba/s]INFO:datasets.arrow_dataset:Caching processed dataset at /content/.cache_training/json/default-79a9e082059ced07/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51/cache-a7b29ded24225be9.arrow\n",
"Running tokenizer on dataset: 100% 2/2 [00:00<00:00, 10.91ba/s]\n",
"INFO:__main__:Sample 10476 of the training set: {'label': 0, 'text': 'i do find new friends i m going to try extra hard to make them stay and if i decide that i don t want to feel hurt again and just ride out the last year of school on my own i m going to have to try extra hard not to care what people think of me being a loner', 'input_ids': [72, 466, 1064, 649, 2460, 1312, 285, 1016, 284, 1949, 3131, 1327, 284, 787, 606, 2652, 290, 611, 1312, 5409, 326, 1312, 836, 256, 765, 284, 1254, 5938, 757, 290, 655, 6594, 503, 262, 938, 614, 286, 1524, 319, 616, 898, 1312, 285, 1016, 284, 423, 284, 1949, 3131, 1327, 407, 284, 1337, 644, 661, 892, 286, 502, 852, 257, 300, 14491, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 1824 of the training set: {'label': 1, 'text': 'i asked them to join me in creating a world where all year old girls could grow up feeling hopeful and powerful', 'input_ids': [72, 1965, 606, 284, 4654, 502, 287, 4441, 257, 995, 810, 477, 614, 1468, 4813, 714, 1663, 510, 4203, 17836, 290, 3665, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"INFO:__main__:Sample 409 of the training set: {'label': 2, 'text': 'i feel when you are a caring person you attract other caring people into your life', 'input_ids': [72, 1254, 618, 345, 389, 257, 18088, 1048, 345, 4729, 584, 18088, 661, 656, 534, 1204, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256, 50256], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}.\n",
"[INFO|trainer.py:710] 2023-02-12 23:30:52,243 >> The following columns in the training set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
2023-02-12 20:03:40 +01:00
"/usr/local/lib/python3.8/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" warnings.warn(\n",
2023-02-13 00:49:10 +01:00
"[INFO|trainer.py:1650] 2023-02-12 23:30:52,252 >> ***** Running training *****\n",
"[INFO|trainer.py:1651] 2023-02-12 23:30:52,253 >> Num examples = 16000\n",
"[INFO|trainer.py:1652] 2023-02-12 23:30:52,253 >> Num Epochs = 1\n",
"[INFO|trainer.py:1653] 2023-02-12 23:30:52,253 >> Instantaneous batch size per device = 8\n",
"[INFO|trainer.py:1654] 2023-02-12 23:30:52,253 >> Total train batch size (w. parallel, distributed & accumulation) = 8\n",
"[INFO|trainer.py:1655] 2023-02-12 23:30:52,253 >> Gradient Accumulation steps = 1\n",
"[INFO|trainer.py:1656] 2023-02-12 23:30:52,253 >> Total optimization steps = 2000\n",
"[INFO|trainer.py:1657] 2023-02-12 23:30:52,254 >> Number of trainable parameters = 137425920\n",
"{'loss': 0.9449, 'learning_rate': 3.7500000000000003e-05, 'epoch': 0.25}\n",
" 25% 500/2000 [02:07<06:07, 4.08it/s][INFO|trainer.py:2709] 2023-02-12 23:32:59,613 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-500\n",
"[INFO|configuration_utils.py:453] 2023-02-12 23:32:59,615 >> Configuration saved in out/emotion/gpt2/checkpoint-500/config.json\n",
"[INFO|modeling_utils.py:1704] 2023-02-12 23:33:01,554 >> Model weights saved in out/emotion/gpt2/checkpoint-500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2160] 2023-02-12 23:33:01,555 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2167] 2023-02-12 23:33:01,555 >> Special tokens file saved in out/emotion/gpt2/checkpoint-500/special_tokens_map.json\n",
"{'loss': 0.3705, 'learning_rate': 2.5e-05, 'epoch': 0.5}\n",
" 50% 1000/2000 [04:17<04:09, 4.01it/s][INFO|trainer.py:2709] 2023-02-12 23:35:09,781 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-1000\n",
"[INFO|configuration_utils.py:453] 2023-02-12 23:35:09,783 >> Configuration saved in out/emotion/gpt2/checkpoint-1000/config.json\n",
"[INFO|modeling_utils.py:1704] 2023-02-12 23:35:11,881 >> Model weights saved in out/emotion/gpt2/checkpoint-1000/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2160] 2023-02-12 23:35:11,882 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-1000/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2167] 2023-02-12 23:35:11,882 >> Special tokens file saved in out/emotion/gpt2/checkpoint-1000/special_tokens_map.json\n",
"{'loss': 0.264, 'learning_rate': 1.25e-05, 'epoch': 0.75}\n",
" 75% 1500/2000 [06:27<02:03, 4.06it/s][INFO|trainer.py:2709] 2023-02-12 23:37:20,141 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-1500\n",
"[INFO|configuration_utils.py:453] 2023-02-12 23:37:20,142 >> Configuration saved in out/emotion/gpt2/checkpoint-1500/config.json\n",
"[INFO|modeling_utils.py:1704] 2023-02-12 23:37:22,060 >> Model weights saved in out/emotion/gpt2/checkpoint-1500/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2160] 2023-02-12 23:37:22,061 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-1500/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2167] 2023-02-12 23:37:22,061 >> Special tokens file saved in out/emotion/gpt2/checkpoint-1500/special_tokens_map.json\n",
"{'loss': 0.2223, 'learning_rate': 0.0, 'epoch': 1.0}\n",
"100% 2000/2000 [08:38<00:00, 4.06it/s][INFO|trainer.py:2709] 2023-02-12 23:39:30,550 >> Saving model checkpoint to out/emotion/gpt2/checkpoint-2000\n",
"[INFO|configuration_utils.py:453] 2023-02-12 23:39:30,551 >> Configuration saved in out/emotion/gpt2/checkpoint-2000/config.json\n",
"[INFO|modeling_utils.py:1704] 2023-02-12 23:39:32,522 >> Model weights saved in out/emotion/gpt2/checkpoint-2000/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2160] 2023-02-12 23:39:32,523 >> tokenizer config file saved in out/emotion/gpt2/checkpoint-2000/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2167] 2023-02-12 23:39:32,524 >> Special tokens file saved in out/emotion/gpt2/checkpoint-2000/special_tokens_map.json\n",
"[INFO|trainer.py:1901] 2023-02-12 23:39:36,929 >> \n",
2023-02-12 20:03:40 +01:00
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n",
2023-02-13 00:49:10 +01:00
"{'train_runtime': 524.6759, 'train_samples_per_second': 30.495, 'train_steps_per_second': 3.812, 'train_loss': 0.4504347610473633, 'epoch': 1.0}\n",
"100% 2000/2000 [08:44<00:00, 3.81it/s]\n",
"[INFO|trainer.py:2709] 2023-02-12 23:39:36,932 >> Saving model checkpoint to out/emotion/gpt2\n",
"[INFO|configuration_utils.py:453] 2023-02-12 23:39:36,934 >> Configuration saved in out/emotion/gpt2/config.json\n",
"[INFO|modeling_utils.py:1704] 2023-02-12 23:39:39,121 >> Model weights saved in out/emotion/gpt2/pytorch_model.bin\n",
"[INFO|tokenization_utils_base.py:2160] 2023-02-12 23:39:39,122 >> tokenizer config file saved in out/emotion/gpt2/tokenizer_config.json\n",
"[INFO|tokenization_utils_base.py:2167] 2023-02-12 23:39:39,122 >> Special tokens file saved in out/emotion/gpt2/special_tokens_map.json\n",
2023-02-12 20:03:40 +01:00
"***** train metrics *****\n",
2023-02-13 00:49:10 +01:00
" epoch = 1.0\n",
" train_loss = 0.4504\n",
" train_runtime = 0:08:44.67\n",
2023-02-12 22:17:41 +01:00
" train_samples = 16000\n",
2023-02-13 00:49:10 +01:00
" train_samples_per_second = 30.495\n",
" train_steps_per_second = 3.812\n",
2023-02-12 20:03:40 +01:00
"INFO:__main__:*** Evaluate ***\n",
2023-02-13 00:49:10 +01:00
"[INFO|trainer.py:710] 2023-02-12 23:39:39,296 >> The following columns in the evaluation set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2964] 2023-02-12 23:39:39,300 >> ***** Running Evaluation *****\n",
"[INFO|trainer.py:2966] 2023-02-12 23:39:39,301 >> Num examples = 2000\n",
"[INFO|trainer.py:2969] 2023-02-12 23:39:39,301 >> Batch size = 8\n",
"100% 250/250 [00:16<00:00, 14.71it/s]\n",
2023-02-12 20:03:40 +01:00
"***** eval metrics *****\n",
2023-02-13 00:49:10 +01:00
" epoch = 1.0\n",
" eval_accuracy = 0.9355\n",
" eval_loss = 0.1925\n",
" eval_runtime = 0:00:17.11\n",
2023-02-12 23:36:28 +01:00
" eval_samples = 2000\n",
2023-02-13 00:49:10 +01:00
" eval_samples_per_second = 116.846\n",
" eval_steps_per_second = 14.606\n",
2023-02-12 20:03:40 +01:00
"INFO:__main__:*** Predict ***\n",
2023-02-13 00:49:10 +01:00
"[INFO|trainer.py:710] 2023-02-12 23:39:56,431 >> The following columns in the test set don't have a corresponding argument in `GPT2ForSequenceClassificationCustom.forward` and have been ignored: text. If text are not expected by `GPT2ForSequenceClassificationCustom.forward`, you can safely ignore this message.\n",
"[INFO|trainer.py:2964] 2023-02-12 23:39:56,432 >> ***** Running Prediction *****\n",
"[INFO|trainer.py:2966] 2023-02-12 23:39:56,433 >> Num examples = 2000\n",
"[INFO|trainer.py:2969] 2023-02-12 23:39:56,433 >> Batch size = 8\n",
"100% 250/250 [00:17<00:00, 14.46it/s]\n",
2023-02-12 20:03:40 +01:00
"INFO:__main__:***** Predict results None *****\n",
2023-02-13 00:49:10 +01:00
"[INFO|modelcard.py:449] 2023-02-12 23:40:14,252 >> Dropping the following result as it does not have all the necessary fields:\n",
"{'task': {'name': 'Text Classification', 'type': 'text-classification'}, 'metrics': [{'name': 'Accuracy', 'type': 'accuracy', 'value': 0.9355000257492065}]}\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"# Save model"
],
"metadata": {
"id": "L55P7rx6nYE2"
}
},
{
"cell_type": "code",
"source": [
"drive.mount('/content/drive')\n",
"!cp -r /content/out/emotion /content/drive/MyDrive/models"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "QuuflS4qnZiw",
"outputId": "39ad5b6f-9019-49dc-a517-1e224d51a0bb"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
2023-02-10 22:26:16 +01:00
]
2023-02-12 20:03:40 +01:00
}
]
2023-02-10 22:26:16 +01:00
}
]
}