forked from kubapok/auta-public
new features
This commit is contained in:
parent
979991c0d8
commit
773c8517d0
1996
dev-0/out.tsv
1996
dev-0/out.tsv
File diff suppressed because it is too large
Load Diff
17
script.py
17
script.py
@ -2,25 +2,32 @@ from sklearn.linear_model import LinearRegression
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
brands = None
|
||||||
|
|
||||||
def get_model():
|
def get_model():
|
||||||
|
global brands
|
||||||
df = pd.read_csv('./train/train.tsv', sep='\t',
|
df = pd.read_csv('./train/train.tsv', sep='\t',
|
||||||
names=["price", "mileage", "year", "brand", "engine_type", "engine_capacity"])
|
names=["price", "mileage", "year", "brand", "engine_type", "engine_capacity"])
|
||||||
X = df.loc[:, df.columns != 'price']
|
X = df.loc[:, df.columns != 'price']
|
||||||
y = df['price']
|
y = df['price']
|
||||||
|
X["age"] = X.year.apply(lambda x: np.sqrt(2017-x))
|
||||||
X = X.drop(["brand"], axis=1)
|
X["sqrt_mileage"] = X.mileage.apply(lambda x: np.sqrt(x))
|
||||||
X = pd.get_dummies(X, columns= ["engine_type"], drop_first=True)
|
brands = X.brand.value_counts()[:35].index.tolist()
|
||||||
|
X.brand = X.brand.apply(lambda x: x if x in brands else "0")
|
||||||
|
X = pd.get_dummies(X)
|
||||||
regr = LinearRegression()
|
regr = LinearRegression()
|
||||||
return regr.fit(X, y)
|
return regr.fit(X, y)
|
||||||
|
|
||||||
|
|
||||||
def predict_and_write(path, model):
|
def predict_and_write(path, model):
|
||||||
|
global brands
|
||||||
with open(f'{path}out.tsv', 'w') as out:
|
with open(f'{path}out.tsv', 'w') as out:
|
||||||
df_dev = pd.read_csv(f'{path}in.tsv', sep='\t',
|
df_dev = pd.read_csv(f'{path}in.tsv', sep='\t',
|
||||||
names=["mileage", "year", "brand", "engine_type", "engine_capacity"])
|
names=["mileage", "year", "brand", "engine_type", "engine_capacity"])
|
||||||
df_dev = df_dev.drop(["brand"], axis=1)
|
df_dev.brand = df_dev.brand.apply(lambda x: x if x in brands else "0")
|
||||||
df_dev = pd.get_dummies(df_dev, columns= ["engine_type"], drop_first=True)
|
df_dev["age"] = df_dev.year.apply(lambda x: np.sqrt(2017-x))
|
||||||
|
df_dev["sqrt_mileage"] = df_dev.mileage.apply(lambda x: np.sqrt(x))
|
||||||
|
df_dev = pd.get_dummies(df_dev)
|
||||||
predictions = model.predict(df_dev).astype(int)
|
predictions = model.predict(df_dev).astype(int)
|
||||||
for prediction in predictions:
|
for prediction in predictions:
|
||||||
out.write(f"{prediction}\n")
|
out.write(f"{prediction}\n")
|
||||||
|
1994
test-A/out.tsv
1994
test-A/out.tsv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user