forked from kubapok/auta-public
Linear regression
This commit is contained in:
parent
5c4bb10ddf
commit
c9fdecc24d
1000
dev-0/out.tsv
Normal file
1000
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
68
main.py
Normal file
68
main.py
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.linear_model import LinearRegression
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
|
||||||
|
names = ['price', 'mileage', 'year', 'brand', 'engineType', 'engineCapacity']
|
||||||
|
x_names = ['mileage', 'year', 'brand', 'engineType', 'engineCapacity']
|
||||||
|
names_without_brand = ['mileage', 'year', 'engineType', 'engineCapacity']
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
train_x, train_y = get_train_data()
|
||||||
|
|
||||||
|
model = LinearRegression()
|
||||||
|
model.fit(train_x, train_y)
|
||||||
|
|
||||||
|
dev_x, dev_y = get_dev_data()
|
||||||
|
|
||||||
|
predicted_dev_y = model.predict(dev_x)
|
||||||
|
save_csv(predicted_dev_y, 'dev-0/out.tsv')
|
||||||
|
|
||||||
|
test_x = get_test_data()
|
||||||
|
|
||||||
|
predicted_test_y = model.predict(test_x)
|
||||||
|
save_csv(predicted_test_y, 'test-A/out.tsv')
|
||||||
|
|
||||||
|
print(RMSE(dev_y, predicted_dev_y))
|
||||||
|
|
||||||
|
|
||||||
|
def get_train_data():
|
||||||
|
raw_data = pd.read_csv('train/train.tsv', sep='\t', names=names)
|
||||||
|
x = raw_data[names_without_brand]
|
||||||
|
x = pd.get_dummies(x, columns=['engineType'])
|
||||||
|
y = raw_data['price']
|
||||||
|
|
||||||
|
return x, y
|
||||||
|
|
||||||
|
|
||||||
|
def get_dev_data():
|
||||||
|
dev_raw_data = pd.read_csv('dev-0/in.tsv', sep='\t', names=x_names)
|
||||||
|
x = dev_raw_data[names_without_brand]
|
||||||
|
x = pd.get_dummies(x, columns=['engineType'])
|
||||||
|
|
||||||
|
with open('dev-0/expected.tsv', 'r') as file:
|
||||||
|
y = [float(line.strip('\n')) for line in file.readlines()]
|
||||||
|
|
||||||
|
return x, y
|
||||||
|
|
||||||
|
|
||||||
|
def get_test_data():
|
||||||
|
test_raw_data = pd.read_csv('test-A/in.tsv', sep='\t', names=x_names)
|
||||||
|
x = test_raw_data[names_without_brand]
|
||||||
|
x = pd.get_dummies(x, columns=['engineType'])
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
def save_csv(data, path):
|
||||||
|
df = pd.DataFrame(data)
|
||||||
|
df.to_csv(path, sep='\t', index=False, header=False)
|
||||||
|
|
||||||
|
|
||||||
|
def RMSE(dev_y, predicted_dev_y):
|
||||||
|
return np.sqrt(mean_squared_error(dev_y, predicted_dev_y))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
1000
test-A/out.tsv
Normal file
1000
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user