31 lines
1004 B
Python
31 lines
1004 B
Python
from sklearn.linear_model import LinearRegression
|
|
import pandas as pd
|
|
|
|
def prepare_data(file, type):
|
|
data = pd.read_csv(file, header=None, sep="\t")
|
|
for c in data.select_dtypes(include=object).columns.values:
|
|
data[c] = data[c].astype("category").cat.codes
|
|
if type == 'train':
|
|
data = pd.get_dummies(data, columns=[4])
|
|
else:
|
|
data = pd.get_dummies(data, columns=[3])
|
|
return data
|
|
|
|
data = prepare_data("./train/train.tsv", "train")
|
|
|
|
data = data.loc[(data[0] > 1000)]
|
|
|
|
price = data.iloc[:,0]
|
|
training_data = data.iloc[:,1:]
|
|
|
|
clf = LinearRegression().fit(training_data, price)
|
|
|
|
with open('dev-0/out.tsv', 'w') as writer:
|
|
dev_data = prepare_data('dev-0/in.tsv', "dev")
|
|
for result in clf.predict(dev_data.iloc[:,0:]):
|
|
writer.write(str(int(result)) + '\n')
|
|
|
|
with open('test-A/out.tsv', 'w') as writer:
|
|
test_data = prepare_data('test-A/in.tsv', "test")
|
|
for result in clf.predict(test_data.iloc[:,0:]):
|
|
writer.write(str(int(result)) + '\n') |