This commit is contained in:
Kornelia Girejko 2022-06-15 11:32:08 +02:00
parent f140a121a2
commit db662285c4
3 changed files with 250 additions and 18 deletions

View File

@ -0,0 +1,116 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "promotional-stage",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import torch\n",
"import csv\n",
"import lzma"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "gothic-olympus",
"metadata": {},
"outputs": [],
"source": [
"x_train = pd.read_table('train/in.tsv', sep='\\t', header=None, quoting=3)\n",
"#x_train = x_train[0:200000]\n",
"#x_train"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "respiratory-train",
"metadata": {},
"outputs": [],
"source": [
"with open('train/expected.tsv', 'r', encoding='utf8') as file:\n",
" y_train = pd.read_csv(file, sep='\\t', header=None)\n",
"#y_train = y_train[0:200000]\n",
"#y_train"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "loving-sewing",
"metadata": {},
"outputs": [],
"source": [
"with open('dev-0/in.tsv', 'r', encoding='utf8') as file:\n",
" x_dev = pd.read_csv(file, sep='\\t', header=None)\n",
"#x_dev"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aware-applicant",
"metadata": {},
"outputs": [],
"source": [
"with open('test-A/in.tsv', 'r', encoding='utf8') as file:\n",
" x_test = pd.read_csv(file, sep='\\t', header=None)\n",
"#x_test"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "lovely-density",
"metadata": {},
"outputs": [],
"source": [
"https://github.com/facebookresearch/fairseq/issues/2666"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "occasional-banks",
"metadata": {},
"outputs": [],
"source": [
"https://github.com/facebookresearch/fairseq/blob/main/fairseq/models/huggingface/hf_gpt2.py"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "human-portal",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -3,7 +3,7 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "74100403-147c-42cd-8285-e30778c0fb66",
"id": "retired-freeze",
"metadata": {},
"outputs": [],
"source": [
@ -19,7 +19,7 @@
{
"cell_type": "code",
"execution_count": 2,
"id": "1ec57d97-a852-490e-8da4-d1e4c9676cd6",
"id": "colored-calculation",
"metadata": {},
"outputs": [],
"source": [
@ -35,7 +35,7 @@
{
"cell_type": "code",
"execution_count": 3,
"id": "86fbfb79-76e7-49f5-b722-2827f93cb03f",
"id": "secondary-worse",
"metadata": {},
"outputs": [
{
@ -155,7 +155,7 @@
{
"cell_type": "code",
"execution_count": 4,
"id": "8960c975-f756-4e36-a1ce-e9fd5fdf8fe3",
"id": "royal-roots",
"metadata": {},
"outputs": [
{
@ -264,7 +264,7 @@
{
"cell_type": "code",
"execution_count": 5,
"id": "6b27e6ce-e9fd-41a1-aacf-53a5fde0a7c1",
"id": "protective-hometown",
"metadata": {},
"outputs": [
{
@ -384,7 +384,7 @@
{
"cell_type": "code",
"execution_count": 6,
"id": "99ae526d-9b7c-493f-be4f-f95b1c8f4b81",
"id": "attractive-banana",
"metadata": {},
"outputs": [
{
@ -504,7 +504,7 @@
{
"cell_type": "code",
"execution_count": 7,
"id": "dba17668-971f-47f8-99ce-fc840b5cb74a",
"id": "realistic-television",
"metadata": {},
"outputs": [],
"source": [
@ -525,7 +525,7 @@
{
"cell_type": "code",
"execution_count": 8,
"id": "1a275c1d-75bc-4290-9332-56396d16a0f2",
"id": "prescription-throat",
"metadata": {},
"outputs": [],
"source": [
@ -543,7 +543,7 @@
{
"cell_type": "code",
"execution_count": 9,
"id": "3125d2f2-0da9-45eb-acf1-90293c6d64a3",
"id": "distinguished-french",
"metadata": {},
"outputs": [
{
@ -564,7 +564,7 @@
{
"cell_type": "code",
"execution_count": 10,
"id": "031a3670-3be7-4146-97b4-0dacd4f9ae58",
"id": "shared-divorce",
"metadata": {},
"outputs": [
{
@ -592,7 +592,7 @@
{
"cell_type": "code",
"execution_count": 15,
"id": "b7defd18-e281-4cf6-9941-cee560749677",
"id": "japanese-broad",
"metadata": {},
"outputs": [],
"source": [
@ -618,7 +618,7 @@
{
"cell_type": "code",
"execution_count": 17,
"id": "92c69ddd-fe58-477f-b2c2-06324a983bcc",
"id": "decent-initial",
"metadata": {},
"outputs": [
{
@ -657,7 +657,7 @@
{
"cell_type": "code",
"execution_count": 13,
"id": "caff921c-d0ab-4fce-a17f-6610266b404d",
"id": "guilty-auditor",
"metadata": {},
"outputs": [
{
@ -680,7 +680,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "73076eb2-810f-4f85-aa3f-05ee884c413b",
"id": "unavailable-morrison",
"metadata": {},
"outputs": [],
"source": [
@ -691,7 +691,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "5730562a-0200-4c8f-8f73-992fa2b36133",
"id": "polished-france",
"metadata": {},
"outputs": [],
"source": [
@ -701,7 +701,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "07a09298-204c-4905-90a8-5dcb87877368",
"id": "underlying-lightning",
"metadata": {},
"outputs": [],
"source": []
@ -709,7 +709,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
@ -723,7 +723,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
"version": "3.7.3"
}
},
"nbformat": 4,

116
run_transformer.ipynb Normal file
View File

@ -0,0 +1,116 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "promotional-stage",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import torch\n",
"import csv\n",
"import lzma"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "gothic-olympus",
"metadata": {},
"outputs": [],
"source": [
"x_train = pd.read_table('train/in.tsv', sep='\\t', header=None, quoting=3)\n",
"#x_train = x_train[0:200000]\n",
"#x_train"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "respiratory-train",
"metadata": {},
"outputs": [],
"source": [
"with open('train/expected.tsv', 'r', encoding='utf8') as file:\n",
" y_train = pd.read_csv(file, sep='\\t', header=None)\n",
"#y_train = y_train[0:200000]\n",
"#y_train"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "loving-sewing",
"metadata": {},
"outputs": [],
"source": [
"with open('dev-0/in.tsv', 'r', encoding='utf8') as file:\n",
" x_dev = pd.read_csv(file, sep='\\t', header=None)\n",
"#x_dev"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aware-applicant",
"metadata": {},
"outputs": [],
"source": [
"with open('test-A/in.tsv', 'r', encoding='utf8') as file:\n",
" x_test = pd.read_csv(file, sep='\\t', header=None)\n",
"#x_test"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "lovely-density",
"metadata": {},
"outputs": [],
"source": [
"https://github.com/facebookresearch/fairseq/issues/2666"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "occasional-banks",
"metadata": {},
"outputs": [],
"source": [
"https://github.com/facebookresearch/fairseq/blob/main/fairseq/models/huggingface/hf_gpt2.py"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "human-portal",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}