forked from kubapok/auta-public
rozwiazanie
This commit is contained in:
parent
5c4bb10ddf
commit
565e25fd33
1000
dev-0/out.tsv
Normal file
1000
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
50
rozwiązanie.py
Normal file
50
rozwiązanie.py
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
import pandas
|
||||||
|
from sklearn.linear_model import LinearRegression
|
||||||
|
|
||||||
|
r_in = './train/train.tsv'
|
||||||
|
# r_expected= './sport-text-classification-ball-ISI-public/train/expected.tsv'
|
||||||
|
r_ind_ev = './dev-0/in.tsv'
|
||||||
|
|
||||||
|
r_ind_test_A = './test-A/in.tsv'
|
||||||
|
|
||||||
|
with open('./names') as f_names:
|
||||||
|
names = f_names.read().rstrip('\n').split('\t')
|
||||||
|
|
||||||
|
tsv_read = pandas.read_table(r_in, error_bad_lines=False, sep='\t', names=names)
|
||||||
|
tsv_read_dev = pandas.read_table(r_ind_ev, error_bad_lines=False, sep='\t',
|
||||||
|
names=['mileage', 'year', 'brand', 'engineType', 'engineCapacity'])
|
||||||
|
|
||||||
|
tsv_read_test_A = pandas.read_table(r_ind_test_A, error_bad_lines=False, sep='\t',
|
||||||
|
names=['mileage', 'year', 'brand', 'engineType', 'engineCapacity'])
|
||||||
|
|
||||||
|
|
||||||
|
train = pandas.get_dummies(tsv_read, columns=['engineType'])
|
||||||
|
|
||||||
|
categorical_cols = train.select_dtypes(include=object).columns.values
|
||||||
|
for col in categorical_cols:
|
||||||
|
train[col] = train[col].astype('category').cat.codes
|
||||||
|
|
||||||
|
train = train.loc[(train['price'] > 1000)]
|
||||||
|
|
||||||
|
X = train.loc[:, train.columns != 'price']
|
||||||
|
clf = LinearRegression().fit(X, train['price'])
|
||||||
|
|
||||||
|
dev = pandas.get_dummies(tsv_read_dev, columns=['engineType'])
|
||||||
|
|
||||||
|
categorical_cols1 = dev.select_dtypes(include=object).columns.values
|
||||||
|
for col in categorical_cols1:
|
||||||
|
dev[col] = dev[col].astype('category').cat.codes
|
||||||
|
|
||||||
|
predictions = clf.predict(dev)
|
||||||
|
|
||||||
|
predictions.tofile("./dev-0/out.tsv", sep='\n')
|
||||||
|
|
||||||
|
test = pandas.get_dummies(tsv_read_test_A, columns=['engineType'])
|
||||||
|
|
||||||
|
categorical_cols2 = test.select_dtypes(include=object).columns.values
|
||||||
|
for col in categorical_cols2:
|
||||||
|
test[col] = test[col].astype('category').cat.codes
|
||||||
|
|
||||||
|
predictions = clf.predict(dev)
|
||||||
|
|
||||||
|
predictions.tofile("./test-A/out.tsv", sep='\n')
|
1000
test-A/out.tsv
Normal file
1000
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user