sport-text-classification-b.../bayes2.py
2021-05-12 19:58:28 +02:00

19 lines
461 B
Python

import pandas as pd
import gzip
dev = pd.read_table('dev-0/in.tsv', error_bad_lines=False, header=None)
test = pd.read_table('test-A/in.tsv', error_bad_lines=False, header=None)
print(dev)
train_y = []
train_X = []
with gzip.open('train/train.tsv.gz', 'r') as f:
for l in f:
line = l.decode('UTF-8').replace("\n", "").split("\t")
train_y.append(line[0])
train_X.append(''.join(line[1:]))
print(train_y[1:20])
print(train_X[1:3])