maybe better

This commit is contained in:
Szymon Parafiński 2022-05-17 23:29:32 +02:00
parent eed10e18da
commit d4148c58f5
4 changed files with 45787 additions and 45786 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

7
run.py
View File

@ -12,18 +12,19 @@ Y_train = []
stop = 0
with lzma.open('train/train.tsv.xz', 'rt', encoding="utf-8") as f:
data = pd.read_csv(f, sep='\t', names=['Begin', 'End', 'Title', 'Publisher', 'Text'])
data = pd.read_csv(f, sep='\t', names=['Begin', 'End', 'Text'])
data = data[['Text', 'Begin']]
data = data[['Text', 'End']]
data = data[0:50000]
X = data['Text']
y = data['Begin']
y = data['End']
model = make_pipeline(TfidfVectorizer(), LinearRegression())
model.fit(X, y)
def readFile(filename):
X_dev = []
with open(filename, 'r', encoding="utf-8") as dev_in:

File diff suppressed because it is too large Load Diff