forked from kubapok/auta-public
218 lines
2.5 KiB
Python
218 lines
2.5 KiB
Python
#!/usr/bin/env python
|
|
# coding: utf-8
|
|
|
|
# In[243]:
|
|
|
|
|
|
import numpy as np
|
|
import matplotlib
|
|
import pandas as pd
|
|
import seaborn as sns
|
|
import matplotlib.pyplot as plt
|
|
from sklearn.linear_model import LinearRegression
|
|
from sklearn.metrics import mean_squared_error
|
|
|
|
|
|
# In[244]:
|
|
|
|
|
|
#with open('train/train.tsv') as file:
|
|
# for line in file.readlines()[:10]:
|
|
# print(line)
|
|
|
|
|
|
# In[245]:
|
|
|
|
|
|
#with open('names') as file:
|
|
# for line in file.readlines():
|
|
# header.append(line.strip())
|
|
|
|
|
|
# In[246]:
|
|
|
|
|
|
#train
|
|
|
|
|
|
# In[247]:
|
|
|
|
|
|
with open('names') as file:
|
|
header = file.read().rstrip('\n').split('\t')
|
|
|
|
train_path='train/train.tsv'
|
|
|
|
|
|
|
|
train = pd.read_csv(train_path, sep='\t', names=header)
|
|
#removing discrete value
|
|
train.drop('brand', inplace=True, axis=1)
|
|
train.drop('engineType', inplace=True, axis=1)
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
|
|
|
|
|
|
# In[248]:
|
|
|
|
|
|
|
|
#output
|
|
y_train = pd.DataFrame(train['price'])
|
|
|
|
|
|
#removing output
|
|
train.drop('price', inplace=True, axis=1)
|
|
x_train = pd.DataFrame(train)
|
|
|
|
model = LinearRegression()
|
|
model.fit(x_train, y_train)
|
|
|
|
header=['price','year','brand','engineType','engineCapacity']
|
|
|
|
|
|
# In[249]:
|
|
|
|
|
|
#dev
|
|
|
|
|
|
# In[250]:
|
|
|
|
|
|
dev = pd.read_csv('dev-0/in.tsv', sep='\t', names=header)
|
|
|
|
|
|
# In[251]:
|
|
|
|
|
|
print(dev)
|
|
|
|
|
|
# In[252]:
|
|
|
|
|
|
with open('dev-0/expected.tsv', 'r') as file:
|
|
y_dev = np.array([float(x.rstrip('\n')) for x in file.readlines()])
|
|
|
|
|
|
# In[253]:
|
|
|
|
|
|
dev.drop('brand', inplace=True, axis=1)
|
|
dev.drop('engineType', inplace=True, axis=1)
|
|
|
|
|
|
# In[254]:
|
|
|
|
|
|
print(dev)
|
|
|
|
|
|
# In[255]:
|
|
|
|
|
|
|
|
|
|
x_dev = pd.DataFrame(dev)
|
|
|
|
predict_dev = model.predict(x_dev)
|
|
print(predict_dev)
|
|
|
|
|
|
# In[256]:
|
|
|
|
|
|
predict_dev.tofile('dev-0/out.tsv', sep='\n')
|
|
|
|
|
|
# # RMSE dla dev-0
|
|
|
|
# In[257]:
|
|
|
|
|
|
error = np.sqrt(mean_squared_error(y_dev, predict_dev))
|
|
print(error)
|
|
|
|
|
|
# In[258]:
|
|
|
|
|
|
#test
|
|
|
|
|
|
# In[259]:
|
|
|
|
|
|
pd.DataFrame(predict).to_csv('dev-0/out.tsv', sep='\t', index=False, header=False)
|
|
|
|
|
|
# In[260]:
|
|
|
|
|
|
test=pd.read_csv('test-A/in.tsv', sep='\t', names=header)
|
|
print(test)
|
|
|
|
|
|
# In[261]:
|
|
|
|
|
|
test.drop('brand', inplace=True, axis=1)
|
|
test.drop('engineType', inplace=True, axis=1)
|
|
y_expected = pd.DataFrame(test['price'])
|
|
|
|
y_expected.to_csv('test-A/expected.tsv', sep='\t', encoding='utf-8')
|
|
|
|
|
|
# In[262]:
|
|
|
|
|
|
print(test)
|
|
|
|
|
|
# In[263]:
|
|
|
|
|
|
x_test = pd.DataFrame(test)
|
|
|
|
predict_test = model.predict(x_test)
|
|
pd.DataFrame(predict_test).to_csv('test-A/out.tsv', sep='\t', index=False, header=False)
|
|
|
|
|
|
# In[264]:
|
|
|
|
|
|
predict_test.tofile('test-A/out.tsv', sep='\n')
|
|
|
|
|
|
# # RMSE dla dev-0
|
|
|
|
# In[266]:
|
|
|
|
|
|
error = np.sqrt(mean_squared_error(y_dev, predict_dev))
|
|
print(error)
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
|
|
|
|
|
|
# In[ ]:
|
|
|
|
|
|
|
|
|