Compare commits
4 Commits
Author | SHA1 | Date | |
---|---|---|---|
e41231f1ea | |||
7ae168e1cb | |||
773c8517d0 | |||
979991c0d8 |
1000
dev-0/out.tsv
Normal file
1000
dev-0/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
47
script.py
Normal file
47
script.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
from sklearn.linear_model import LinearRegression
|
||||||
|
from sklearn.preprocessing import PolynomialFeatures
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
brands = None
|
||||||
|
|
||||||
|
def process_data(df):
|
||||||
|
df["age"] = 2018 - df["year"]
|
||||||
|
df["sqrt_age"] = df.age**0.7
|
||||||
|
df["sqrt_mileage"] = df.mileage ** 0.7
|
||||||
|
df["sqrt_engine_capacity"] = df.engine_capacity ** 0.7
|
||||||
|
global brands
|
||||||
|
if not brands:
|
||||||
|
brands = df.brand.value_counts()[:35].index.tolist()
|
||||||
|
df.brand = df.brand.apply(lambda x: x if x in brands else "0")
|
||||||
|
df = pd.get_dummies(df)
|
||||||
|
poly = PolynomialFeatures(2, interaction_only=True)
|
||||||
|
df = poly.fit_transform(df)
|
||||||
|
return df
|
||||||
|
|
||||||
|
def get_model():
|
||||||
|
df = pd.read_csv('./train/train.tsv', sep='\t',
|
||||||
|
names=["price", "mileage", "year", "brand", "engine_type", "engine_capacity"])
|
||||||
|
X = df.loc[:, df.columns != 'price']
|
||||||
|
y = df['price']
|
||||||
|
X = process_data(X)
|
||||||
|
regr = LinearRegression()
|
||||||
|
return regr.fit(X, y)
|
||||||
|
|
||||||
|
|
||||||
|
def predict_and_write(path, model):
|
||||||
|
with open(f'{path}out.tsv', 'w') as out:
|
||||||
|
df_dev = pd.read_csv(f'{path}in.tsv', sep='\t',
|
||||||
|
names=["mileage", "year", "brand", "engine_type", "engine_capacity"])
|
||||||
|
df_dev = process_data(df_dev)
|
||||||
|
predictions = model.predict(df_dev).astype(int)
|
||||||
|
for prediction in predictions:
|
||||||
|
out.write(f"{prediction}\n")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
model = get_model()
|
||||||
|
predict_and_write('./dev-0/', model)
|
||||||
|
predict_and_write('./test-A/', model)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
1000
test-A/out.tsv
Normal file
1000
test-A/out.tsv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user