Compare commits

...

4 Commits

Author SHA1 Message Date
e41231f1ea magic 2021-05-12 11:42:17 +02:00
7ae168e1cb poly 2021-05-12 10:55:01 +02:00
773c8517d0 new features 2021-05-12 09:11:08 +02:00
979991c0d8 linear regr 2021-05-09 21:26:15 +02:00
4 changed files with 2047 additions and 0 deletions

1000
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

BIN
geval Executable file

Binary file not shown.

47
script.py Normal file
View File

@ -0,0 +1,47 @@
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
import pandas as pd
import numpy as np
brands = None
def process_data(df):
df["age"] = 2018 - df["year"]
df["sqrt_age"] = df.age**0.7
df["sqrt_mileage"] = df.mileage ** 0.7
df["sqrt_engine_capacity"] = df.engine_capacity ** 0.7
global brands
if not brands:
brands = df.brand.value_counts()[:35].index.tolist()
df.brand = df.brand.apply(lambda x: x if x in brands else "0")
df = pd.get_dummies(df)
poly = PolynomialFeatures(2, interaction_only=True)
df = poly.fit_transform(df)
return df
def get_model():
df = pd.read_csv('./train/train.tsv', sep='\t',
names=["price", "mileage", "year", "brand", "engine_type", "engine_capacity"])
X = df.loc[:, df.columns != 'price']
y = df['price']
X = process_data(X)
regr = LinearRegression()
return regr.fit(X, y)
def predict_and_write(path, model):
with open(f'{path}out.tsv', 'w') as out:
df_dev = pd.read_csv(f'{path}in.tsv', sep='\t',
names=["mileage", "year", "brand", "engine_type", "engine_capacity"])
df_dev = process_data(df_dev)
predictions = model.predict(df_dev).astype(int)
for prediction in predictions:
out.write(f"{prediction}\n")
def main():
model = get_model()
predict_and_write('./dev-0/', model)
predict_and_write('./test-A/', model)
if __name__ == '__main__':
main()

1000
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff