Compare commits
4 Commits
Author | SHA1 | Date | |
---|---|---|---|
58fa4e4e57 | |||
a8a249287f | |||
916bb37c66 | |||
4b141f3a8c |
1000
dev-0/out.tsv
Normal file
1000
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
1
dev0_rmse.txt
Normal file
1
dev0_rmse.txt
Normal file
@ -0,0 +1 @@
|
||||
1165319253.2984157
|
62
main.py
Normal file
62
main.py
Normal file
@ -0,0 +1,62 @@
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.metrics import mean_squared_error
|
||||
import numpy as np
|
||||
|
||||
df = pd.read_csv("train/train.tsv", header=None, sep="\t", error_bad_lines=False, names=['price', 'mileage', 'year',
|
||||
'brand', 'engineType',
|
||||
'engineCapacity'])
|
||||
dev0 = pd.read_csv("dev-0/in.tsv", header=None, sep="\t", error_bad_lines=False, names=['mileage', 'year',
|
||||
'brand', 'engineType',
|
||||
'engineCapacity'])
|
||||
testA = pd.read_csv("test-A/in.tsv", header=None, sep="\t", error_bad_lines=False, names=['mileage', 'year',
|
||||
'brand', 'engineType',
|
||||
'engineCapacity'])
|
||||
expected = pd.read_csv("dev-0/expected.tsv", header=None, sep="\t", error_bad_lines=False, names=['price'])
|
||||
|
||||
df = df[['price', 'year', 'mileage', 'engineCapacity']]
|
||||
|
||||
min_val = np.min(df)
|
||||
max_val = np.max(df)
|
||||
df = (df - min_val) / (max_val - min_val)
|
||||
|
||||
Y = df[['price']]
|
||||
X = df[['year', 'mileage', 'engineCapacity']]
|
||||
|
||||
model = LinearRegression().fit(X, Y)
|
||||
|
||||
dev0 = (dev0[['year', 'mileage', 'engineCapacity']] - min_val) / (max_val - min_val)
|
||||
testA = (testA[['year', 'mileage', 'engineCapacity']] - min_val) / (max_val - min_val)
|
||||
|
||||
predicted_dev0 = model.predict(dev0[['year', 'mileage', 'engineCapacity']])
|
||||
predicted_testA = model.predict(testA[['year', 'mileage', 'engineCapacity']])
|
||||
|
||||
predicted_denormalized = []
|
||||
for pred in predicted_dev0:
|
||||
denorm = pred[0] * (max_val[0] - min_val[0]) + min_val[0]
|
||||
predicted_denormalized.append(denorm)
|
||||
|
||||
with open("dev-0/out.tsv", "w") as file:
|
||||
for pred in predicted_denormalized:
|
||||
file.write(str(pred) + "\n")
|
||||
|
||||
predicted_denormalizedA = []
|
||||
for pred in predicted_testA:
|
||||
denorm = pred[0] * (max_val[0] - min_val[0]) + min_val[0]
|
||||
predicted_denormalizedA.append(denorm)
|
||||
|
||||
with open("test-A/out.tsv", "w") as file:
|
||||
for pred in predicted_denormalizedA:
|
||||
file.write(str(pred) + "\n")
|
||||
|
||||
|
||||
predicted_denormalized = DataFrame(predicted_denormalized, columns=['pred'])
|
||||
error = mean_squared_error(expected, predicted_denormalized)
|
||||
for exp, pred in zip(expected.values, predicted_denormalized.values):
|
||||
print(exp, pred)
|
||||
|
||||
f = open("dev0_rmse.txt", "w")
|
||||
f.write(str(error))
|
||||
f.close()
|
||||
print(error)
|
1000
test-A/out.tsv
Normal file
1000
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user