This commit is contained in:
Adrian 2022-05-17 12:56:52 +02:00
parent eeb51fec1d
commit 3a326579b5
2 changed files with 3 additions and 12 deletions

View File

@ -2,17 +2,10 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 99, "execution_count": 1,
"id": "8f5480f9-fa82-4150-acff-9309fdc43690", "id": "8f5480f9-fa82-4150-acff-9309fdc43690",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"107463\n"
]
},
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
@ -20,7 +13,7 @@
" ('linearregression', LinearRegression())])" " ('linearregression', LinearRegression())])"
] ]
}, },
"execution_count": 99, "execution_count": 1,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -35,8 +28,6 @@
"with open('train/train.tsv', 'r', encoding='utf8') as file:\n", "with open('train/train.tsv', 'r', encoding='utf8') as file:\n",
" train_data = pd.read_csv(file, sep='\\t', names=['Begin', 'End', 'Title', 'Publisher', 'Text'])\n", " train_data = pd.read_csv(file, sep='\\t', names=['Begin', 'End', 'Title', 'Publisher', 'Text'])\n",
"\n", "\n",
"print(len(train_data)) \n",
"train_data = train_data[:10000]\n",
" \n", " \n",
"X = train_data['Text']\n", "X = train_data['Text']\n",
"Y = train_data['Begin']\n", "Y = train_data['Begin']\n",

2
run.py
View File

@ -23,7 +23,7 @@ def write_pred(filename, predictions):
train_data = train_data[:10000] # train_data = train_data[:10000]
X = train_data['Text'] X = train_data['Text']
Y = train_data['Begin'] Y = train_data['Begin']