import pandas as pd import numpy as np import gzip dev = pd.read_table('dev-0/in.tsv', error_bad_lines=False, header=None) test = pd.read_table('test-A/in.tsv', error_bad_lines=False, header=None) X_train = [] y_train = [] with gzip.open('train/train.tsv.gz', 'r') as f: for l in f: line = l.decode('UTF-8').replace("\n", "").split("\t") y_train.append(line[0]) X_train.append(line[1:]) X_train = np.asanyarray(X_train) y_train = np.asanyarray(y_train) X_dev = dev[0].values X_test = test[0].values print(X_dev)