This commit is contained in:
Adrian 2022-05-17 13:13:22 +02:00
parent 8359c05a74
commit c2b77f7492
5 changed files with 45789 additions and 45788 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 5,
"id": "8f5480f9-fa82-4150-acff-9309fdc43690", "id": "8f5480f9-fa82-4150-acff-9309fdc43690",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -13,7 +13,7 @@
" ('linearregression', LinearRegression())])" " ('linearregression', LinearRegression())])"
] ]
}, },
"execution_count": 1, "execution_count": 5,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -28,6 +28,7 @@
"with open('train/train.tsv', 'r', encoding='utf8') as file:\n", "with open('train/train.tsv', 'r', encoding='utf8') as file:\n",
" train_data = pd.read_csv(file, sep='\\t', names=['Begin', 'End', 'Title', 'Publisher', 'Text'])\n", " train_data = pd.read_csv(file, sep='\\t', names=['Begin', 'End', 'Title', 'Publisher', 'Text'])\n",
"\n", "\n",
"train_data = train_data[:20000] \n",
" \n", " \n",
"X = train_data['Text']\n", "X = train_data['Text']\n",
"Y = train_data['Begin']\n", "Y = train_data['Begin']\n",
@ -39,7 +40,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 6,
"id": "02e89f1c-a2d0-4d41-94a2-aa86b257069d", "id": "02e89f1c-a2d0-4d41-94a2-aa86b257069d",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -61,7 +62,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": null,
"id": "b85f5e22-eafb-41ee-aa2c-20c338d42701", "id": "b85f5e22-eafb-41ee-aa2c-20c338d42701",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],

2
run.py
View File

@ -23,7 +23,7 @@ def write_pred(filename, predictions):
# train_data = train_data[:10000] train_data = train_data[:10000]
X = train_data['Text'] X = train_data['Text']
Y = train_data['Begin'] Y = train_data['Begin']

File diff suppressed because it is too large Load Diff