import pandas as pd from pandas import DataFrame from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error import numpy as np df = pd.read_csv("train/train.tsv", header=None, sep="\t", error_bad_lines=False) dev0 = pd.read_csv("dev-0/in.tsv", header=None, sep="\t", error_bad_lines=False) testA = pd.read_csv("test-A/in.tsv", header=None, sep="\t", error_bad_lines=False) expected = pd.read_csv("dev-0/expected.tsv", header=None, sep="\t", error_bad_lines=False) all_df = df.copy() test_dev0 = dev0.copy() test_dev0.insert(0, "dum", 0, True) test_dev0 = test_dev0.rename(columns={"dum": 0, 0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6}) all_df = all_df.append(test_dev0, ignore_index=True) test_testA = testA.copy() test_testA.insert(0, "dum", 0, True) test_testA = test_testA.rename(columns={"dum": 0, 0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6}) all_df = all_df.append(test_testA, ignore_index=True) all_df = pd.get_dummies(all_df, columns=[3, 4]) min_val = np.min(all_df) max_val = np.max(all_df) all_df = (all_df - min_val) / (max_val - min_val) dummy_df = all_df[:len(df)] dummy_dev0 = all_df[len(df):len(df) + len(dev0)] dummy_testA = all_df[len(df) + len(dev0):] X = dummy_df[dummy_df.columns[1:]] Y = dummy_df[dummy_df.columns[:1]] model = LinearRegression().fit(X, Y) predicted_dev0 = model.predict(dummy_dev0[dummy_dev0.columns[1:]]) predicted_testA = model.predict(dummy_testA[dummy_testA.columns[1:]]) with open("dev-0/out.tsv", "w") as file: for pred in predicted_dev0: file.write(str(pred[0]) + "\n") with open("test-A/out.tsv", "w") as file: for pred in predicted_testA: file.write(str(pred[0]) + "\n") predicted_denormalized = [] for pred in predicted_dev0: predicted_denormalized.append(pred[0] * (max_val[0] - min_val[0]) + min_val[0]) predicted_denormalized = DataFrame(predicted_denormalized, columns=['pred']) error = mean_squared_error(expected, predicted_denormalized) for exp, pred in zip(expected.values, predicted_denormalized.values): print(exp, pred) f = open("dev0_rmse.txt", "w") f.write(str(error)) f.close()