From 9ccc657af6c51324f56d5966dfc767319e2e3445 Mon Sep 17 00:00:00 2001 From: Maciej Sobkowiak Date: Wed, 12 May 2021 19:58:28 +0200 Subject: [PATCH] Working on bayes2 --- bayes2.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 bayes2.py diff --git a/bayes2.py b/bayes2.py new file mode 100644 index 0000000..008b9b6 --- /dev/null +++ b/bayes2.py @@ -0,0 +1,18 @@ +import pandas as pd +import gzip + +dev = pd.read_table('dev-0/in.tsv', error_bad_lines=False, header=None) +test = pd.read_table('test-A/in.tsv', error_bad_lines=False, header=None) + +print(dev) + +train_y = [] +train_X = [] +with gzip.open('train/train.tsv.gz', 'r') as f: + for l in f: + line = l.decode('UTF-8').replace("\n", "").split("\t") + train_y.append(line[0]) + train_X.append(''.join(line[1:])) + +print(train_y[1:20]) +print(train_X[1:3])