import pandas as pd from sklearn.linear_model import LinearRegression def proces(data1): data1 = pd.concat([data1, data1['engineType'].str.get_dummies().astype(bool)], axis=1) data1 = data1.drop(['engineType', 'brand'], axis=1) return data1 def dev(): data1_dev = pd.read_table('C:/Users/Ufnow/Desktop/Projekt/auta/auta-public/dev-0/in.tsv', error_bad_lines=False, header=None, names=['mileage', 'year', 'brand', 'engineType', 'engineCap']) data1_dev = proces(data1_dev) data2_pred = model.predict(data1_dev) data2_pred.tofile('C:/Users/Ufnow/Desktop/Projekt/auta/auta-public/dev-0/out.tsv', sep='\n') def testA(): data1_test_A = pd.read_table('C:/Users/Ufnow/Desktop/Projekt/auta/auta-public/test-A/in.tsv', error_bad_lines=False, header=None, names=['mileage', 'year', 'brand', 'engineType', 'engineCap']) data1_test_A = proces(data1_test_A) data2_pred_A = model.predict(data1_test_A) data2_pred_A.tofile('C:/Users/Ufnow/Desktop/Projekt/auta/auta-public/test-A/out.tsv', sep='\n') data = pd.read_table('C:/Users/Ufnow/Desktop/Projekt/auta/auta-public/train/train.tsv', error_bad_lines=False, header=None, names=['price', 'mileage', 'year', 'brand', 'engineType', 'engineCap']) data2_train = data['price'] data1_train = data.iloc[:, 1:] data1_train = proces(data1_train) model = LinearRegression() model.fit(data1_train, data2_train) def main(): dev() testA() if __name__ == '__main__': main()