diff --git a/linear-regression.py b/linear-regression.py index 243b3d3..bc0f2c7 100644 --- a/linear-regression.py +++ b/linear-regression.py @@ -13,8 +13,8 @@ def linear_regression(): colnames_test = ['data'] train = pd.read_csv("train/train.tsv", names = colnames_train, sep = "\t") - dev_0 = pd.read_csv("dev-0/in.tsv", error_bad_lines = False, header = None, sep = "\t") - dev_1 = pd.read_csv("dev-1/in.tsv", error_bad_lines = False, header = None, sep = "\t") + dev_0 = pd.read_csv("dev-0/in.tsv", error_bad_lines = False, header = None, sep = "\t", quoting=csv.QUOTE_NONE) + dev_1 = pd.read_csv("dev-1/in.tsv", error_bad_lines = False, header = None, sep = "\t", quoting=csv.QUOTE_NONE,) test = pd.read_csv("test-A/in.tsv", names = colnames_test, sep = "\t") # stworzenie instancji TFIDF i regresji liniowej @@ -29,14 +29,14 @@ def linear_regression(): lin_reg.fit(train_vec, date) # predykcja dla dev-0 - evaluate_dev = tf(dev_0['data']) + evaluate_dev = tf.transform(dev_0['data']) prediction_dev = lin_reg.predict(evaluate_dev) - pd.DataFrame(prediction_dev).to_csv('dev-0/out.tsv', sep = "\t", index = False, header = False) + pd.DataFrame(prediction_dev).to_csv('dev-0/out2.tsv', sep = "\t", index = False, header = False) # predykcja dla test-A - evaluate_test = tf(test['data']) + evaluate_test = tf.transform(test['data']) prediction_test = lin_reg.predict(evaluate_test) - pd.DataFrame(prediction_test).to_csv('test-A/out.tsv', sep = "\t", index = False, header = False) + pd.DataFrame(prediction_test).to_csv('test-A/out2.tsv', sep = "\t", index = False, header = False) return None