Rozpoznawanie ceny aut

This commit is contained in:
Anna Nowak 2021-05-18 23:40:47 +02:00
parent 5c4bb10ddf
commit 6388633ffe
5 changed files with 3937 additions and 0 deletions

2
.gitignore vendored
View File

@ -1,3 +1,5 @@
*~
*.swp
*.pyc
geval
.ipynb_checkpoints/*

935
cars price prediction.ipynb Normal file
View File

@ -0,0 +1,935 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: scikit-learn==0.24.2 in c:\\users\\ania\\appdata\\roaming\\python\\python38\\site-packages (0.24.2)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\programdata\\anaconda3\\lib\\site-packages (from scikit-learn==0.24.2) (2.1.0)\n",
"Requirement already satisfied: scipy>=0.19.1 in c:\\programdata\\anaconda3\\lib\\site-packages (from scikit-learn==0.24.2) (1.5.2)\n",
"Requirement already satisfied: joblib>=0.11 in c:\\programdata\\anaconda3\\lib\\site-packages (from scikit-learn==0.24.2) (0.17.0)\n",
"Requirement already satisfied: numpy>=1.13.3 in c:\\programdata\\anaconda3\\lib\\site-packages (from scikit-learn==0.24.2) (1.19.2)\n"
]
}
],
"source": [
"!pip install scikit-learn==0.24.2 --user"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from math import sqrt\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.metrics import mean_squared_error\n",
"np.set_printoptions(formatter={'float_kind':'{:f}'.format})"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"header = None\n",
"with open('names') as f:\n",
" header = f.read().replace('\\n', '').split('\\t')\n",
"cars_train = pd.read_csv('train/train.tsv', sep=\"\\t\", names=header)\n",
"cars_train_X = cars_train[[\"mileage\", \"year\", \"brand\", \"engineType\", \"engineCapacity\"]]\n",
"cars_train_X = pd.get_dummies(cars_train_X)\n",
"cars_train_Y = cars_train[\"price\"]\n",
"input_columns = cars_train_X.columns"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"30118.8791272898"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Trenowanie modelu i błąd na train\n",
"model = LinearRegression(positive=True)\n",
"model.fit(cars_train_X, cars_train_Y)\n",
"predictions = model.predict(cars_train_X)\n",
"sqrt(mean_squared_error(predictions, cars_train_Y))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"cars_dev_0_X = pd.read_csv('dev-0/in.tsv', sep=\"\\t\", names=header[1:])\n",
"cars_dev_0_Y = pd.read_csv('dev-0/expected.tsv', sep=\"\\t\", header=None).to_numpy().flatten('F')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mileage</th>\n",
" <th>year</th>\n",
" <th>engineCapacity</th>\n",
" <th>brand_Abarth</th>\n",
" <th>brand_Aixam</th>\n",
" <th>brand_Alfa</th>\n",
" <th>brand_Aston</th>\n",
" <th>brand_Audi</th>\n",
" <th>brand_Austin</th>\n",
" <th>brand_BMW</th>\n",
" <th>...</th>\n",
" <th>brand_Uaz</th>\n",
" <th>brand_Vauxhall</th>\n",
" <th>brand_Volkswagen</th>\n",
" <th>brand_Volvo</th>\n",
" <th>brand_Warszawa</th>\n",
" <th>brand_dla</th>\n",
" <th>brand_star</th>\n",
" <th>engineType_benzyna</th>\n",
" <th>engineType_diesel</th>\n",
" <th>engineType_gaz</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>77000</td>\n",
" <td>2015</td>\n",
" <td>2000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>186146</td>\n",
" <td>2006</td>\n",
" <td>1498</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>192000</td>\n",
" <td>2007</td>\n",
" <td>2500</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>220000</td>\n",
" <td>2003</td>\n",
" <td>1997</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>248000</td>\n",
" <td>2008</td>\n",
" <td>1900</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>995</th>\n",
" <td>146000</td>\n",
" <td>2004</td>\n",
" <td>1686</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>996</th>\n",
" <td>19323</td>\n",
" <td>2015</td>\n",
" <td>1598</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>997</th>\n",
" <td>27561</td>\n",
" <td>2016</td>\n",
" <td>1598</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>998</th>\n",
" <td>155000</td>\n",
" <td>2012</td>\n",
" <td>1600</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999</th>\n",
" <td>31438</td>\n",
" <td>2015</td>\n",
" <td>3000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1000 rows × 96 columns</p>\n",
"</div>"
],
"text/plain": [
" mileage year engineCapacity brand_Abarth brand_Aixam brand_Alfa \\\n",
"0 77000 2015 2000 0 0 0 \n",
"1 186146 2006 1498 0 0 0 \n",
"2 192000 2007 2500 0 0 0 \n",
"3 220000 2003 1997 0 0 0 \n",
"4 248000 2008 1900 0 0 0 \n",
".. ... ... ... ... ... ... \n",
"995 146000 2004 1686 0 0 0 \n",
"996 19323 2015 1598 0 0 0 \n",
"997 27561 2016 1598 0 0 0 \n",
"998 155000 2012 1600 0 0 0 \n",
"999 31438 2015 3000 0 0 0 \n",
"\n",
" brand_Aston brand_Audi brand_Austin brand_BMW ... brand_Uaz \\\n",
"0 0 0 0 0 ... 0 \n",
"1 0 0 0 0 ... 0 \n",
"2 0 0 0 0 ... 0 \n",
"3 0 0 0 0 ... 0 \n",
"4 0 0 0 0 ... 0 \n",
".. ... ... ... ... ... ... \n",
"995 0 0 0 0 ... 0 \n",
"996 0 0 0 0 ... 0 \n",
"997 0 0 0 0 ... 0 \n",
"998 0 0 0 0 ... 0 \n",
"999 0 0 0 0 ... 0 \n",
"\n",
" brand_Vauxhall brand_Volkswagen brand_Volvo brand_Warszawa brand_dla \\\n",
"0 0 0 0 0 0 \n",
"1 0 0 0 0 0 \n",
"2 0 0 0 0 0 \n",
"3 0 0 0 0 0 \n",
"4 0 1 0 0 0 \n",
".. ... ... ... ... ... \n",
"995 0 0 0 0 0 \n",
"996 0 0 0 0 0 \n",
"997 0 0 0 0 0 \n",
"998 0 0 0 0 0 \n",
"999 0 0 0 0 0 \n",
"\n",
" brand_star engineType_benzyna engineType_diesel engineType_gaz \n",
"0 0 0 1 0 \n",
"1 0 1 0 0 \n",
"2 0 0 1 0 \n",
"3 0 0 1 0 \n",
"4 0 0 1 0 \n",
".. ... ... ... ... \n",
"995 0 0 1 0 \n",
"996 0 1 0 0 \n",
"997 0 0 1 0 \n",
"998 0 1 0 0 \n",
"999 0 0 1 0 \n",
"\n",
"[1000 rows x 96 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Przygotowanie inputu dla DEV_0\n",
"cars_dev_0_X = pd.get_dummies(cars_dev_0_X)\n",
"columns_to_add = [x for x in input_columns if x not in cars_dev_0_X.columns]\n",
"for column in columns_to_add:\n",
" cars_dev_0_X[column] = 0\n",
"cars_dev_0_X = cars_dev_0_X[input_columns]\n",
"cars_dev_0_X"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"33193.54683638966"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Wynik dla DEV_0\n",
"predictions_dev = model.predict(cars_dev_0_X)\n",
"np.savetxt(\"dev-0/out.tsv\", predictions_dev, fmt='%f')\n",
"sqrt(mean_squared_error(predictions_dev, cars_dev_0_Y))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"cars_test_A_X = pd.read_csv('test-A/in.tsv', sep=\"\\t\", names=header[1:])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mileage</th>\n",
" <th>year</th>\n",
" <th>engineCapacity</th>\n",
" <th>brand_Abarth</th>\n",
" <th>brand_Aixam</th>\n",
" <th>brand_Alfa</th>\n",
" <th>brand_Aston</th>\n",
" <th>brand_Audi</th>\n",
" <th>brand_Austin</th>\n",
" <th>brand_BMW</th>\n",
" <th>...</th>\n",
" <th>brand_Uaz</th>\n",
" <th>brand_Vauxhall</th>\n",
" <th>brand_Volkswagen</th>\n",
" <th>brand_Volvo</th>\n",
" <th>brand_Warszawa</th>\n",
" <th>brand_dla</th>\n",
" <th>brand_star</th>\n",
" <th>engineType_benzyna</th>\n",
" <th>engineType_diesel</th>\n",
" <th>engineType_gaz</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>203000</td>\n",
" <td>2010</td>\n",
" <td>1500</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>39000</td>\n",
" <td>2008</td>\n",
" <td>1000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>190000</td>\n",
" <td>2005</td>\n",
" <td>1600</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>230000</td>\n",
" <td>2001</td>\n",
" <td>1598</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>189000</td>\n",
" <td>2000</td>\n",
" <td>1600</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>995</th>\n",
" <td>465000</td>\n",
" <td>2005</td>\n",
" <td>2500</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>996</th>\n",
" <td>89074</td>\n",
" <td>2014</td>\n",
" <td>2000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>997</th>\n",
" <td>21711</td>\n",
" <td>2014</td>\n",
" <td>1329</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>998</th>\n",
" <td>144000</td>\n",
" <td>2014</td>\n",
" <td>1500</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999</th>\n",
" <td>113606</td>\n",
" <td>2000</td>\n",
" <td>4000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1000 rows × 96 columns</p>\n",
"</div>"
],
"text/plain": [
" mileage year engineCapacity brand_Abarth brand_Aixam brand_Alfa \\\n",
"0 203000 2010 1500 0 0 0 \n",
"1 39000 2008 1000 0 0 0 \n",
"2 190000 2005 1600 0 0 0 \n",
"3 230000 2001 1598 0 0 0 \n",
"4 189000 2000 1600 0 0 0 \n",
".. ... ... ... ... ... ... \n",
"995 465000 2005 2500 0 0 0 \n",
"996 89074 2014 2000 0 0 0 \n",
"997 21711 2014 1329 0 0 0 \n",
"998 144000 2014 1500 0 0 0 \n",
"999 113606 2000 4000 0 0 0 \n",
"\n",
" brand_Aston brand_Audi brand_Austin brand_BMW ... brand_Uaz \\\n",
"0 0 0 0 0 ... 0 \n",
"1 0 0 0 0 ... 0 \n",
"2 0 0 0 0 ... 0 \n",
"3 0 0 0 0 ... 0 \n",
"4 0 0 0 1 ... 0 \n",
".. ... ... ... ... ... ... \n",
"995 0 0 0 0 ... 0 \n",
"996 0 0 0 1 ... 0 \n",
"997 0 0 0 0 ... 0 \n",
"998 0 0 0 0 ... 0 \n",
"999 0 0 0 0 ... 0 \n",
"\n",
" brand_Vauxhall brand_Volkswagen brand_Volvo brand_Warszawa brand_dla \\\n",
"0 0 0 0 0 0 \n",
"1 0 0 0 0 0 \n",
"2 0 0 0 0 0 \n",
"3 0 1 0 0 0 \n",
"4 0 0 0 0 0 \n",
".. ... ... ... ... ... \n",
"995 0 0 0 0 0 \n",
"996 0 0 0 0 0 \n",
"997 0 0 0 0 0 \n",
"998 0 0 0 0 0 \n",
"999 0 0 0 0 0 \n",
"\n",
" brand_star engineType_benzyna engineType_diesel engineType_gaz \n",
"0 0 0 1 0 \n",
"1 0 1 0 0 \n",
"2 0 0 1 0 \n",
"3 0 1 0 0 \n",
"4 0 1 0 0 \n",
".. ... ... ... ... \n",
"995 0 0 1 0 \n",
"996 0 0 1 0 \n",
"997 0 1 0 0 \n",
"998 0 0 1 0 \n",
"999 0 1 0 0 \n",
"\n",
"[1000 rows x 96 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Dostosowanie inputu dla testu\n",
"cars_test_A_X = pd.get_dummies(cars_test_A_X)\n",
"columns_to_add = [x for x in input_columns if x not in cars_test_A_X.columns]\n",
"for column in columns_to_add:\n",
" cars_test_A_X[column] = 0\n",
"cars_test_A_X = cars_test_A_X[input_columns]\n",
"cars_test_A_X"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"#Predykcja i zapisywanie wyniku dla testu\n",
"predictions_test = model.predict(cars_test_A_X)\n",
"np.savetxt(\"test-A/out.tsv\", predictions_test, fmt='%f')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

1000
dev-0/out.tsv Normal file

File diff suppressed because it is too large Load Diff

1000
in.tsv Normal file

File diff suppressed because it is too large Load Diff

1000
test-A/out.tsv Normal file

File diff suppressed because it is too large Load Diff