This commit is contained in:
Adrian 2022-05-17 13:13:22 +02:00
parent 8359c05a74
commit c2b77f7492
5 changed files with 45789 additions and 45788 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 5,
"id": "8f5480f9-fa82-4150-acff-9309fdc43690",
"metadata": {},
"outputs": [
@ -13,7 +13,7 @@
" ('linearregression', LinearRegression())])"
]
},
"execution_count": 1,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@ -28,6 +28,7 @@
"with open('train/train.tsv', 'r', encoding='utf8') as file:\n",
" train_data = pd.read_csv(file, sep='\\t', names=['Begin', 'End', 'Title', 'Publisher', 'Text'])\n",
"\n",
"train_data = train_data[:20000] \n",
" \n",
"X = train_data['Text']\n",
"Y = train_data['Begin']\n",
@ -39,7 +40,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 6,
"id": "02e89f1c-a2d0-4d41-94a2-aa86b257069d",
"metadata": {},
"outputs": [],
@ -61,7 +62,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "b85f5e22-eafb-41ee-aa2c-20c338d42701",
"metadata": {},
"outputs": [],

2
run.py
View File

@ -23,7 +23,7 @@ def write_pred(filename, predictions):
# train_data = train_data[:10000]
train_data = train_data[:10000]
X = train_data['Text']
Y = train_data['Begin']

File diff suppressed because it is too large Load Diff