from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model import LinearRegression from sklearn.pipeline import make_pipeline from sklearn import preprocessing from sklearn import linear_model import pandas as pd import numpy as np myT = pd.read_csv('train/train.tsv', sep='\t', names = ['price','mileage','year','brand','engineType','engineCapacity']) datF = pd.DataFrame(myT, columns = ['price','mileage','year','brand','engineType','engineCapacity']) y = datF[['price']] x = datF[['year','mileage','engineCapacity']] reg = linear_model.LinearRegression() reg.fit(x, y) put = pd.read_csv('test-A/in.tsv', sep = '\t', names = ['mileage','year','brand','engineType','engineCapacity']) datF = pd.DataFrame(put, columns = ['mileage','year','brand','engineType','engineCapacity']) r = datF[['year','mileage','engineCapacity']] y1 = reg.predict(r) y1 = np.concatenate(y1) tmp = np.array2string(y1, precision = 5, separator = '\n', suppress_small = True) tmp = tmp.lstrip('[').rstrip(']') f = open("test-A/out.tsv", "a") f.write(tmp)