#!/usr/bin/env python # coding: utf-8 # In[243]: import numpy as np import matplotlib import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error # In[244]: #with open('train/train.tsv') as file: # for line in file.readlines()[:10]: # print(line) # In[245]: #with open('names') as file: # for line in file.readlines(): # header.append(line.strip()) # In[246]: #train # In[247]: with open('names') as file: header = file.read().rstrip('\n').split('\t') train_path='train/train.tsv' train = pd.read_csv(train_path, sep='\t', names=header) #removing discrete value train.drop('brand', inplace=True, axis=1) train.drop('engineType', inplace=True, axis=1) # In[ ]: # In[248]: #output y_train = pd.DataFrame(train['price']) #removing output train.drop('price', inplace=True, axis=1) x_train = pd.DataFrame(train) model = LinearRegression() model.fit(x_train, y_train) header=['price','year','brand','engineType','engineCapacity'] # In[249]: #dev # In[250]: dev = pd.read_csv('dev-0/in.tsv', sep='\t', names=header) # In[251]: print(dev) # In[252]: with open('dev-0/expected.tsv', 'r') as file: y_dev = np.array([float(x.rstrip('\n')) for x in file.readlines()]) # In[253]: dev.drop('brand', inplace=True, axis=1) dev.drop('engineType', inplace=True, axis=1) # In[254]: print(dev) # In[255]: x_dev = pd.DataFrame(dev) predict_dev = model.predict(x_dev) print(predict_dev) # In[256]: predict_dev.tofile('dev-0/out.tsv', sep='\n') # # RMSE dla dev-0 # In[257]: error = np.sqrt(mean_squared_error(y_dev, predict_dev)) print(error) # In[258]: #test # In[259]: pd.DataFrame(predict).to_csv('dev-0/out.tsv', sep='\t', index=False, header=False) # In[260]: test=pd.read_csv('test-A/in.tsv', sep='\t', names=header) print(test) # In[261]: test.drop('brand', inplace=True, axis=1) test.drop('engineType', inplace=True, axis=1) y_expected = pd.DataFrame(test['price']) y_expected.to_csv('test-A/expected.tsv', sep='\t', encoding='utf-8') # In[262]: print(test) # In[263]: x_test = pd.DataFrame(test) predict_test = model.predict(x_test) pd.DataFrame(predict_test).to_csv('test-A/out.tsv', sep='\t', index=False, header=False) # In[264]: predict_test.tofile('test-A/out.tsv', sep='\n') # # RMSE dla dev-0 # In[266]: error = np.sqrt(mean_squared_error(y_dev, predict_dev)) print(error) # In[ ]: # In[ ]: # In[ ]: