import numpy as np import pandas as pd from scipy.sparse import data from sklearn import linear_model from sklearn import preprocessing from sklearn.pipeline import make_pipeline from sklearn.feature_extraction.text import TfidfVectorizer from sklearn import linear_model import csv import pandas as pd regression = linear_model.LinearRegression() train_file = pd.read_csv('train/train.tsv', delimiter='\t', names=['price', 'mileage', 'year', 'brand', 'engineType', 'engineCapacity']) train_data_frame = pd.DataFrame(train_file, columns=['price', 'mileage', 'year', 'brand', 'engineType', 'engineCapacity']) Y = train_data_frame[['price']] X = train_data_frame[['year', 'mileage', 'engineCapacity']] regression.fit(X, Y) in_file = pd.read_csv('test-A/in.tsv', delimiter='\t', names=['mileage', 'year', 'brand', 'engineType', 'engineCapacity']) in_data_frame = pd.DataFrame(in_file, columns=['mileage', 'year', 'brand', 'engineType', 'engineCapacity']) reshape = in_data_frame[['year', 'mileage', 'engineCapacity']] y_predict = regression.predict(reshape) y_predict = np.concatenate(y_predict) labels = np.array2string(y_predict, separator='\n', suppress_small=True) file_out = open("test-A/out.tsv", 'w') file_out.write(labels[1:-1]) with open("test-A/out.tsv", 'r') as fix_space: lines = fix_space.readlines() lines = [line.replace(' ', '') for line in lines] with open("test-A/out.tsv", 'w') as fix_space: fix_space.writelines(lines)