diff --git a/.ipynb_checkpoints/run_transformer-checkpoint.ipynb b/.ipynb_checkpoints/run_transformer-checkpoint.ipynb new file mode 100644 index 0000000..9200f10 --- /dev/null +++ b/.ipynb_checkpoints/run_transformer-checkpoint.ipynb @@ -0,0 +1,116 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "promotional-stage", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import torch\n", + "import csv\n", + "import lzma" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "gothic-olympus", + "metadata": {}, + "outputs": [], + "source": [ + "x_train = pd.read_table('train/in.tsv', sep='\\t', header=None, quoting=3)\n", + "#x_train = x_train[0:200000]\n", + "#x_train" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "respiratory-train", + "metadata": {}, + "outputs": [], + "source": [ + "with open('train/expected.tsv', 'r', encoding='utf8') as file:\n", + " y_train = pd.read_csv(file, sep='\\t', header=None)\n", + "#y_train = y_train[0:200000]\n", + "#y_train" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "loving-sewing", + "metadata": {}, + "outputs": [], + "source": [ + "with open('dev-0/in.tsv', 'r', encoding='utf8') as file:\n", + " x_dev = pd.read_csv(file, sep='\\t', header=None)\n", + "#x_dev" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aware-applicant", + "metadata": {}, + "outputs": [], + "source": [ + "with open('test-A/in.tsv', 'r', encoding='utf8') as file:\n", + " x_test = pd.read_csv(file, sep='\\t', header=None)\n", + "#x_test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "lovely-density", + "metadata": {}, + "outputs": [], + "source": [ + "https://github.com/facebookresearch/fairseq/issues/2666" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "occasional-banks", + "metadata": {}, + "outputs": [], + "source": [ + "https://github.com/facebookresearch/fairseq/blob/main/fairseq/models/huggingface/hf_gpt2.py" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "human-portal", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/run.ipynb b/run.ipynb index 9806b40..cba6075 100644 --- a/run.ipynb +++ b/run.ipynb @@ -3,7 +3,7 @@ { "cell_type": "code", "execution_count": 1, - "id": "74100403-147c-42cd-8285-e30778c0fb66", + "id": "retired-freeze", "metadata": {}, "outputs": [], "source": [ @@ -19,7 +19,7 @@ { "cell_type": "code", "execution_count": 2, - "id": "1ec57d97-a852-490e-8da4-d1e4c9676cd6", + "id": "colored-calculation", "metadata": {}, "outputs": [], "source": [ @@ -35,7 +35,7 @@ { "cell_type": "code", "execution_count": 3, - "id": "86fbfb79-76e7-49f5-b722-2827f93cb03f", + "id": "secondary-worse", "metadata": {}, "outputs": [ { @@ -155,7 +155,7 @@ { "cell_type": "code", "execution_count": 4, - "id": "8960c975-f756-4e36-a1ce-e9fd5fdf8fe3", + "id": "royal-roots", "metadata": {}, "outputs": [ { @@ -264,7 +264,7 @@ { "cell_type": "code", "execution_count": 5, - "id": "6b27e6ce-e9fd-41a1-aacf-53a5fde0a7c1", + "id": "protective-hometown", "metadata": {}, "outputs": [ { @@ -384,7 +384,7 @@ { "cell_type": "code", "execution_count": 6, - "id": "99ae526d-9b7c-493f-be4f-f95b1c8f4b81", + "id": "attractive-banana", "metadata": {}, "outputs": [ { @@ -504,7 +504,7 @@ { "cell_type": "code", "execution_count": 7, - "id": "dba17668-971f-47f8-99ce-fc840b5cb74a", + "id": "realistic-television", "metadata": {}, "outputs": [], "source": [ @@ -525,7 +525,7 @@ { "cell_type": "code", "execution_count": 8, - "id": "1a275c1d-75bc-4290-9332-56396d16a0f2", + "id": "prescription-throat", "metadata": {}, "outputs": [], "source": [ @@ -543,7 +543,7 @@ { "cell_type": "code", "execution_count": 9, - "id": "3125d2f2-0da9-45eb-acf1-90293c6d64a3", + "id": "distinguished-french", "metadata": {}, "outputs": [ { @@ -564,7 +564,7 @@ { "cell_type": "code", "execution_count": 10, - "id": "031a3670-3be7-4146-97b4-0dacd4f9ae58", + "id": "shared-divorce", "metadata": {}, "outputs": [ { @@ -592,7 +592,7 @@ { "cell_type": "code", "execution_count": 15, - "id": "b7defd18-e281-4cf6-9941-cee560749677", + "id": "japanese-broad", "metadata": {}, "outputs": [], "source": [ @@ -618,7 +618,7 @@ { "cell_type": "code", "execution_count": 17, - "id": "92c69ddd-fe58-477f-b2c2-06324a983bcc", + "id": "decent-initial", "metadata": {}, "outputs": [ { @@ -657,7 +657,7 @@ { "cell_type": "code", "execution_count": 13, - "id": "caff921c-d0ab-4fce-a17f-6610266b404d", + "id": "guilty-auditor", "metadata": {}, "outputs": [ { @@ -680,7 +680,7 @@ { "cell_type": "code", "execution_count": null, - "id": "73076eb2-810f-4f85-aa3f-05ee884c413b", + "id": "unavailable-morrison", "metadata": {}, "outputs": [], "source": [ @@ -691,7 +691,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5730562a-0200-4c8f-8f73-992fa2b36133", + "id": "polished-france", "metadata": {}, "outputs": [], "source": [ @@ -701,7 +701,7 @@ { "cell_type": "code", "execution_count": null, - "id": "07a09298-204c-4905-90a8-5dcb87877368", + "id": "underlying-lightning", "metadata": {}, "outputs": [], "source": [] @@ -709,7 +709,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3", "language": "python", "name": "python3" }, @@ -723,7 +723,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.7.3" } }, "nbformat": 4, diff --git a/run_transformer.ipynb b/run_transformer.ipynb new file mode 100644 index 0000000..9200f10 --- /dev/null +++ b/run_transformer.ipynb @@ -0,0 +1,116 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "promotional-stage", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import torch\n", + "import csv\n", + "import lzma" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "gothic-olympus", + "metadata": {}, + "outputs": [], + "source": [ + "x_train = pd.read_table('train/in.tsv', sep='\\t', header=None, quoting=3)\n", + "#x_train = x_train[0:200000]\n", + "#x_train" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "respiratory-train", + "metadata": {}, + "outputs": [], + "source": [ + "with open('train/expected.tsv', 'r', encoding='utf8') as file:\n", + " y_train = pd.read_csv(file, sep='\\t', header=None)\n", + "#y_train = y_train[0:200000]\n", + "#y_train" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "loving-sewing", + "metadata": {}, + "outputs": [], + "source": [ + "with open('dev-0/in.tsv', 'r', encoding='utf8') as file:\n", + " x_dev = pd.read_csv(file, sep='\\t', header=None)\n", + "#x_dev" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aware-applicant", + "metadata": {}, + "outputs": [], + "source": [ + "with open('test-A/in.tsv', 'r', encoding='utf8') as file:\n", + " x_test = pd.read_csv(file, sep='\\t', header=None)\n", + "#x_test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "lovely-density", + "metadata": {}, + "outputs": [], + "source": [ + "https://github.com/facebookresearch/fairseq/issues/2666" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "occasional-banks", + "metadata": {}, + "outputs": [], + "source": [ + "https://github.com/facebookresearch/fairseq/blob/main/fairseq/models/huggingface/hf_gpt2.py" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "human-portal", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}