From 37da463746b5d343353f7cd25076943922088291 Mon Sep 17 00:00:00 2001 From: Maciej Sobkowiak Date: Mon, 21 Jun 2021 21:10:27 +0200 Subject: [PATCH] Read data --- seq.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 seq.py diff --git a/seq.py b/seq.py new file mode 100644 index 0000000..c57a458 --- /dev/null +++ b/seq.py @@ -0,0 +1,21 @@ +import pandas as pd +import numpy as np +import gensim +import torch +import pandas as pd +import seaborn as sns +from sklearn.model_selection import train_test_split + + +# Load data +train = pd.read_csv('train/train.tsv', sep='\t', names=['labels', 'document']) +Y_train = train['labels'].values +X_train = train['document'].values + +test = pd.read_csv('test-A/in.tsv', sep='\t', names=['document']) +X_test = test['document'].values + +dev = pd.read_csv('dev-0/in.tsv', sep='\t', names=['document']) +exp = pd.read_csv('dev-0/expected.tsv', sep='\t', names=['labels']) +X_dev = dev['document'].values +Y_dev = dev['labels'].values