auta-public/Untitled.py

218 lines
2.5 KiB
Python
Raw Permalink Normal View History

2021-05-16 19:30:35 +02:00
#!/usr/bin/env python
# coding: utf-8
# In[243]:
import numpy as np
import matplotlib
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
# In[244]:
#with open('train/train.tsv') as file:
# for line in file.readlines()[:10]:
# print(line)
# In[245]:
#with open('names') as file:
# for line in file.readlines():
# header.append(line.strip())
# In[246]:
#train
# In[247]:
with open('names') as file:
header = file.read().rstrip('\n').split('\t')
train_path='train/train.tsv'
train = pd.read_csv(train_path, sep='\t', names=header)
#removing discrete value
train.drop('brand', inplace=True, axis=1)
train.drop('engineType', inplace=True, axis=1)
# In[ ]:
# In[248]:
#output
y_train = pd.DataFrame(train['price'])
#removing output
train.drop('price', inplace=True, axis=1)
x_train = pd.DataFrame(train)
model = LinearRegression()
model.fit(x_train, y_train)
header=['price','year','brand','engineType','engineCapacity']
# In[249]:
#dev
# In[250]:
dev = pd.read_csv('dev-0/in.tsv', sep='\t', names=header)
# In[251]:
print(dev)
# In[252]:
with open('dev-0/expected.tsv', 'r') as file:
y_dev = np.array([float(x.rstrip('\n')) for x in file.readlines()])
# In[253]:
dev.drop('brand', inplace=True, axis=1)
dev.drop('engineType', inplace=True, axis=1)
# In[254]:
print(dev)
# In[255]:
x_dev = pd.DataFrame(dev)
predict_dev = model.predict(x_dev)
print(predict_dev)
# In[256]:
predict_dev.tofile('dev-0/out.tsv', sep='\n')
# # RMSE dla dev-0
# In[257]:
error = np.sqrt(mean_squared_error(y_dev, predict_dev))
print(error)
# In[258]:
#test
# In[259]:
pd.DataFrame(predict).to_csv('dev-0/out.tsv', sep='\t', index=False, header=False)
# In[260]:
test=pd.read_csv('test-A/in.tsv', sep='\t', names=header)
print(test)
# In[261]:
test.drop('brand', inplace=True, axis=1)
test.drop('engineType', inplace=True, axis=1)
y_expected = pd.DataFrame(test['price'])
y_expected.to_csv('test-A/expected.tsv', sep='\t', encoding='utf-8')
# In[262]:
print(test)
# In[263]:
x_test = pd.DataFrame(test)
predict_test = model.predict(x_test)
pd.DataFrame(predict_test).to_csv('test-A/out.tsv', sep='\t', index=False, header=False)
# In[264]:
predict_test.tofile('test-A/out.tsv', sep='\n')
# # RMSE dla dev-0
# In[266]:
error = np.sqrt(mean_squared_error(y_dev, predict_dev))
print(error)
# In[ ]:
# In[ ]:
# In[ ]: