diff --git a/run.py b/run.py index c45d631..76bd61f 100644 --- a/run.py +++ b/run.py @@ -7,10 +7,14 @@ train_file = sys.argv[1] pred_file = sys.argv[2] train = pd.read_csv(train_file, sep='\t', header=None) -pred_x = pd.read_csv(pred_file, sep='\t', header=None) +#pred_x = pd.read_csv(pred_file, sep='\t', header=None) +pred_x = [] +with open(pred_file, encoding='utf-8') as f: + for line in f: + pred_x.append(line) train_x, train_y = train[4], train[0] #pred_x = pred[4] -pred_x = pred_x.stack() +#pred_x = pred_x.stack() vectorizer = TfidfVectorizer() train_x = vectorizer.fit_transform(train_x)