import pandas as pd import gzip dev = pd.read_table('dev-0/in.tsv', error_bad_lines=False, header=None) test = pd.read_table('test-A/in.tsv', error_bad_lines=False, header=None) print(dev) train_y = [] train_X = [] with gzip.open('train/train.tsv.gz', 'r') as f: for l in f: line = l.decode('UTF-8').replace("\n", "").split("\t") train_y.append(line[0]) train_X.append(''.join(line[1:])) print(train_y[1:20]) print(train_X[1:3])