Compare commits

...

2 Commits

Author SHA1 Message Date
pietrzakkuba
a0b6346b07 fix making out files 2021-05-03 11:59:41 +02:00
pietrzakkuba
085e33a1d4 lin reg 2021-05-03 11:54:26 +02:00
3 changed files with 2037 additions and 0 deletions

1000
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

37
linreg.py Normal file
View File

@ -0,0 +1,37 @@
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from math import sqrt
# trenowanie
train_data = pd.read_csv('train/train.tsv', sep='\t', names=['price', 'mileage', 'year', 'brand', 'engine_type', 'engine_capacity'])
train_y = train_data[['price']]
train_x = train_data[['mileage', 'year', 'engine_capacity']]
lin_reg = LinearRegression()
lin_reg.fit(train_x, train_y)
# predykcja dev-0
dev_data = pd.read_csv('dev-0/in.tsv', sep='\t', names=['mileage', 'year', 'brand', 'engine_type', 'engine_capacity'])
dev_x = dev_data[['mileage', 'year', 'engine_capacity']]
dev_prediction = lin_reg.predict(dev_x)
# ewaluacja dev-0 przy pomocy rmse
dev_y = pd.read_csv('dev-0/expected.tsv', sep='\n', header=None)
dev_rmse = sqrt(mean_squared_error(dev_y, dev_prediction))
print(dev_rmse)
# predycja test-A
test_data = pd.read_csv('test-A/in.tsv', sep='\t', names=['mileage', 'year', 'brand', 'engine_type', 'engine_capacity'])
test_x = test_data[['mileage', 'year', 'engine_capacity']]
test_prediction = lin_reg.predict(test_x)
# zapisane predykcji do plików out
with open('test-A/out.tsv', 'w') as test_output:
for prediction in test_prediction:
test_output.write(str(prediction[0]) + '\n')
with open('dev-0/out.tsv', 'w') as dev_output:
for prediction in dev_prediction:
dev_output.write(str(prediction[0]) + '\n')

1000
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff