forked from kubapok/auta-public
32 KiB
32 KiB
!pip install scikit-learn==0.24.2 --user
Requirement already satisfied: scikit-learn==0.24.2 in c:\users\ania\appdata\roaming\python\python38\site-packages (0.24.2) Requirement already satisfied: threadpoolctl>=2.0.0 in c:\programdata\anaconda3\lib\site-packages (from scikit-learn==0.24.2) (2.1.0) Requirement already satisfied: scipy>=0.19.1 in c:\programdata\anaconda3\lib\site-packages (from scikit-learn==0.24.2) (1.5.2) Requirement already satisfied: joblib>=0.11 in c:\programdata\anaconda3\lib\site-packages (from scikit-learn==0.24.2) (0.17.0) Requirement already satisfied: numpy>=1.13.3 in c:\programdata\anaconda3\lib\site-packages (from scikit-learn==0.24.2) (1.19.2)
import pandas as pd
import numpy as np
from math import sqrt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
np.set_printoptions(formatter={'float_kind':'{:f}'.format})
header = None
with open('names') as f:
header = f.read().replace('\n', '').split('\t')
cars_train = pd.read_csv('train/train.tsv', sep="\t", names=header)
cars_train_X = cars_train[["mileage", "year", "brand", "engineType", "engineCapacity"]]
cars_train_X = pd.get_dummies(cars_train_X)
cars_train_Y = cars_train["price"]
input_columns = cars_train_X.columns
#Trenowanie modelu i błąd na train
model = LinearRegression(positive=True)
model.fit(cars_train_X, cars_train_Y)
predictions = model.predict(cars_train_X)
sqrt(mean_squared_error(predictions, cars_train_Y))
30118.8791272898
cars_dev_0_X = pd.read_csv('dev-0/in.tsv', sep="\t", names=header[1:])
cars_dev_0_Y = pd.read_csv('dev-0/expected.tsv', sep="\t", header=None).to_numpy().flatten('F')
#Przygotowanie inputu dla DEV_0
cars_dev_0_X = pd.get_dummies(cars_dev_0_X)
columns_to_add = [x for x in input_columns if x not in cars_dev_0_X.columns]
for column in columns_to_add:
cars_dev_0_X[column] = 0
cars_dev_0_X = cars_dev_0_X[input_columns]
cars_dev_0_X
mileage | year | engineCapacity | brand_Abarth | brand_Aixam | brand_Alfa | brand_Aston | brand_Audi | brand_Austin | brand_BMW | ... | brand_Uaz | brand_Vauxhall | brand_Volkswagen | brand_Volvo | brand_Warszawa | brand_dla | brand_star | engineType_benzyna | engineType_diesel | engineType_gaz | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 77000 | 2015 | 2000 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
1 | 186146 | 2006 | 1498 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
2 | 192000 | 2007 | 2500 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
3 | 220000 | 2003 | 1997 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
4 | 248000 | 2008 | 1900 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
995 | 146000 | 2004 | 1686 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
996 | 19323 | 2015 | 1598 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
997 | 27561 | 2016 | 1598 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
998 | 155000 | 2012 | 1600 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
999 | 31438 | 2015 | 3000 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
1000 rows × 96 columns
#Wynik dla DEV_0
predictions_dev = model.predict(cars_dev_0_X)
np.savetxt("dev-0/out.tsv", predictions_dev, fmt='%f')
sqrt(mean_squared_error(predictions_dev, cars_dev_0_Y))
33193.54683638966
cars_test_A_X = pd.read_csv('test-A/in.tsv', sep="\t", names=header[1:])
#Dostosowanie inputu dla testu
cars_test_A_X = pd.get_dummies(cars_test_A_X)
columns_to_add = [x for x in input_columns if x not in cars_test_A_X.columns]
for column in columns_to_add:
cars_test_A_X[column] = 0
cars_test_A_X = cars_test_A_X[input_columns]
cars_test_A_X
mileage | year | engineCapacity | brand_Abarth | brand_Aixam | brand_Alfa | brand_Aston | brand_Audi | brand_Austin | brand_BMW | ... | brand_Uaz | brand_Vauxhall | brand_Volkswagen | brand_Volvo | brand_Warszawa | brand_dla | brand_star | engineType_benzyna | engineType_diesel | engineType_gaz | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 203000 | 2010 | 1500 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
1 | 39000 | 2008 | 1000 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
2 | 190000 | 2005 | 1600 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
3 | 230000 | 2001 | 1598 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
4 | 189000 | 2000 | 1600 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
995 | 465000 | 2005 | 2500 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
996 | 89074 | 2014 | 2000 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
997 | 21711 | 2014 | 1329 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
998 | 144000 | 2014 | 1500 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
999 | 113606 | 2000 | 4000 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
1000 rows × 96 columns
#Predykcja i zapisywanie wyniku dla testu
predictions_test = model.predict(cars_test_A_X)
np.savetxt("test-A/out.tsv", predictions_test, fmt='%f')