forked from kubapok/auta-public
linear sklearn
This commit is contained in:
parent
5c4bb10ddf
commit
4b141f3a8c
1000
dev-0/out.tsv
Normal file
1000
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
1
dev0_rmse.txt
Normal file
1
dev0_rmse.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
1013603084.8408571
|
59
main.py
Normal file
59
main.py
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from pandas import DataFrame
|
||||||
|
from sklearn.linear_model import LinearRegression
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
df = pd.read_csv("train/train.tsv", header=None, sep="\t", error_bad_lines=False)
|
||||||
|
dev0 = pd.read_csv("dev-0/in.tsv", header=None, sep="\t", error_bad_lines=False)
|
||||||
|
testA = pd.read_csv("test-A/in.tsv", header=None, sep="\t", error_bad_lines=False)
|
||||||
|
expected = pd.read_csv("dev-0/expected.tsv", header=None, sep="\t", error_bad_lines=False)
|
||||||
|
|
||||||
|
all_df = df.copy()
|
||||||
|
test_dev0 = dev0.copy()
|
||||||
|
test_dev0.insert(0, "dum", 0, True)
|
||||||
|
test_dev0 = test_dev0.rename(columns={"dum": 0, 0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6})
|
||||||
|
all_df = all_df.append(test_dev0, ignore_index=True)
|
||||||
|
|
||||||
|
test_testA = testA.copy()
|
||||||
|
test_testA.insert(0, "dum", 0, True)
|
||||||
|
test_testA = test_testA.rename(columns={"dum": 0, 0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6})
|
||||||
|
all_df = all_df.append(test_testA, ignore_index=True)
|
||||||
|
all_df = pd.get_dummies(all_df, columns=[3, 4])
|
||||||
|
|
||||||
|
min_val = np.min(all_df)
|
||||||
|
max_val = np.max(all_df)
|
||||||
|
all_df = (all_df - min_val) / (max_val - min_val)
|
||||||
|
|
||||||
|
dummy_df = all_df[:len(df)]
|
||||||
|
dummy_dev0 = all_df[len(df):len(df) + len(dev0)]
|
||||||
|
dummy_testA = all_df[len(df) + len(dev0):]
|
||||||
|
|
||||||
|
X = dummy_df[dummy_df.columns[1:]]
|
||||||
|
Y = dummy_df[dummy_df.columns[:1]]
|
||||||
|
|
||||||
|
model = LinearRegression().fit(X, Y)
|
||||||
|
|
||||||
|
predicted_dev0 = model.predict(dummy_dev0[dummy_dev0.columns[1:]])
|
||||||
|
predicted_testA = model.predict(dummy_testA[dummy_testA.columns[1:]])
|
||||||
|
|
||||||
|
with open("dev-0/out.tsv", "w") as file:
|
||||||
|
for pred in predicted_dev0:
|
||||||
|
file.write(str(pred[0]) + "\n")
|
||||||
|
|
||||||
|
with open("test-A/out.tsv", "w") as file:
|
||||||
|
for pred in predicted_testA:
|
||||||
|
file.write(str(pred[0]) + "\n")
|
||||||
|
|
||||||
|
predicted_denormalized = []
|
||||||
|
for pred in predicted_dev0:
|
||||||
|
predicted_denormalized.append(pred[0] * (max_val[0] - min_val[0]) + min_val[0])
|
||||||
|
|
||||||
|
predicted_denormalized = DataFrame(predicted_denormalized, columns=['pred'])
|
||||||
|
error = mean_squared_error(expected, predicted_denormalized)
|
||||||
|
for exp, pred in zip(expected.values, predicted_denormalized.values):
|
||||||
|
print(exp, pred)
|
||||||
|
|
||||||
|
f = open("dev0_rmse.txt", "w")
|
||||||
|
f.write(str(error))
|
||||||
|
f.close()
|
1000
test-A/out.tsv
Normal file
1000
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user