forked from kubapok/auta-public
Zadanie
This commit is contained in:
parent
60e7521dcc
commit
53df56f572
217
Untitled.py
Normal file
217
Untitled.py
Normal file
@ -0,0 +1,217 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[243]:
|
||||
|
||||
|
||||
import numpy as np
|
||||
import matplotlib
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.metrics import mean_squared_error
|
||||
|
||||
|
||||
# In[244]:
|
||||
|
||||
|
||||
#with open('train/train.tsv') as file:
|
||||
# for line in file.readlines()[:10]:
|
||||
# print(line)
|
||||
|
||||
|
||||
# In[245]:
|
||||
|
||||
|
||||
#with open('names') as file:
|
||||
# for line in file.readlines():
|
||||
# header.append(line.strip())
|
||||
|
||||
|
||||
# In[246]:
|
||||
|
||||
|
||||
#train
|
||||
|
||||
|
||||
# In[247]:
|
||||
|
||||
|
||||
with open('names') as file:
|
||||
header = file.read().rstrip('\n').split('\t')
|
||||
|
||||
train_path='train/train.tsv'
|
||||
|
||||
|
||||
|
||||
train = pd.read_csv(train_path, sep='\t', names=header)
|
||||
#removing discrete value
|
||||
train.drop('brand', inplace=True, axis=1)
|
||||
train.drop('engineType', inplace=True, axis=1)
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# In[248]:
|
||||
|
||||
|
||||
|
||||
#output
|
||||
y_train = pd.DataFrame(train['price'])
|
||||
|
||||
|
||||
#removing output
|
||||
train.drop('price', inplace=True, axis=1)
|
||||
x_train = pd.DataFrame(train)
|
||||
|
||||
model = LinearRegression()
|
||||
model.fit(x_train, y_train)
|
||||
|
||||
header=['price','year','brand','engineType','engineCapacity']
|
||||
|
||||
|
||||
# In[249]:
|
||||
|
||||
|
||||
#dev
|
||||
|
||||
|
||||
# In[250]:
|
||||
|
||||
|
||||
dev = pd.read_csv('dev-0/in.tsv', sep='\t', names=header)
|
||||
|
||||
|
||||
# In[251]:
|
||||
|
||||
|
||||
print(dev)
|
||||
|
||||
|
||||
# In[252]:
|
||||
|
||||
|
||||
with open('dev-0/expected.tsv', 'r') as file:
|
||||
y_dev = np.array([float(x.rstrip('\n')) for x in file.readlines()])
|
||||
|
||||
|
||||
# In[253]:
|
||||
|
||||
|
||||
dev.drop('brand', inplace=True, axis=1)
|
||||
dev.drop('engineType', inplace=True, axis=1)
|
||||
|
||||
|
||||
# In[254]:
|
||||
|
||||
|
||||
print(dev)
|
||||
|
||||
|
||||
# In[255]:
|
||||
|
||||
|
||||
|
||||
|
||||
x_dev = pd.DataFrame(dev)
|
||||
|
||||
predict_dev = model.predict(x_dev)
|
||||
print(predict_dev)
|
||||
|
||||
|
||||
# In[256]:
|
||||
|
||||
|
||||
predict_dev.tofile('dev-0/out.tsv', sep='\n')
|
||||
|
||||
|
||||
# # RMSE dla dev-0
|
||||
|
||||
# In[257]:
|
||||
|
||||
|
||||
error = np.sqrt(mean_squared_error(y_dev, predict_dev))
|
||||
print(error)
|
||||
|
||||
|
||||
# In[258]:
|
||||
|
||||
|
||||
#test
|
||||
|
||||
|
||||
# In[259]:
|
||||
|
||||
|
||||
pd.DataFrame(predict).to_csv('dev-0/out.tsv', sep='\t', index=False, header=False)
|
||||
|
||||
|
||||
# In[260]:
|
||||
|
||||
|
||||
test=pd.read_csv('test-A/in.tsv', sep='\t', names=header)
|
||||
print(test)
|
||||
|
||||
|
||||
# In[261]:
|
||||
|
||||
|
||||
test.drop('brand', inplace=True, axis=1)
|
||||
test.drop('engineType', inplace=True, axis=1)
|
||||
y_expected = pd.DataFrame(test['price'])
|
||||
|
||||
y_expected.to_csv('test-A/expected.tsv', sep='\t', encoding='utf-8')
|
||||
|
||||
|
||||
# In[262]:
|
||||
|
||||
|
||||
print(test)
|
||||
|
||||
|
||||
# In[263]:
|
||||
|
||||
|
||||
x_test = pd.DataFrame(test)
|
||||
|
||||
predict_test = model.predict(x_test)
|
||||
pd.DataFrame(predict_test).to_csv('test-A/out.tsv', sep='\t', index=False, header=False)
|
||||
|
||||
|
||||
# In[264]:
|
||||
|
||||
|
||||
predict_test.tofile('test-A/out.tsv', sep='\n')
|
||||
|
||||
|
||||
# # RMSE dla dev-0
|
||||
|
||||
# In[266]:
|
||||
|
||||
|
||||
error = np.sqrt(mean_squared_error(y_dev, predict_dev))
|
||||
print(error)
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
|
||||
|
1000
dev-0/out.tsv
Normal file
1000
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
1001
test-A/expected.tsv
Normal file
1001
test-A/expected.tsv
Normal file
File diff suppressed because it is too large
Load Diff
1000
test-A/out.tsv
Normal file
1000
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user