Compare commits

..

No commits in common. "master" and "master" have entirely different histories.

5 changed files with 0 additions and 3937 deletions

2
.gitignore vendored
View File

@ -1,5 +1,3 @@
*~ *~
*.swp *.swp
*.pyc *.pyc
geval
.ipynb_checkpoints/*

View File

@ -1,935 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: scikit-learn==0.24.2 in c:\\users\\ania\\appdata\\roaming\\python\\python38\\site-packages (0.24.2)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\programdata\\anaconda3\\lib\\site-packages (from scikit-learn==0.24.2) (2.1.0)\n",
"Requirement already satisfied: scipy>=0.19.1 in c:\\programdata\\anaconda3\\lib\\site-packages (from scikit-learn==0.24.2) (1.5.2)\n",
"Requirement already satisfied: joblib>=0.11 in c:\\programdata\\anaconda3\\lib\\site-packages (from scikit-learn==0.24.2) (0.17.0)\n",
"Requirement already satisfied: numpy>=1.13.3 in c:\\programdata\\anaconda3\\lib\\site-packages (from scikit-learn==0.24.2) (1.19.2)\n"
]
}
],
"source": [
"!pip install scikit-learn==0.24.2 --user"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from math import sqrt\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.metrics import mean_squared_error\n",
"np.set_printoptions(formatter={'float_kind':'{:f}'.format})"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"header = None\n",
"with open('names') as f:\n",
" header = f.read().replace('\\n', '').split('\\t')\n",
"cars_train = pd.read_csv('train/train.tsv', sep=\"\\t\", names=header)\n",
"cars_train_X = cars_train[[\"mileage\", \"year\", \"brand\", \"engineType\", \"engineCapacity\"]]\n",
"cars_train_X = pd.get_dummies(cars_train_X)\n",
"cars_train_Y = cars_train[\"price\"]\n",
"input_columns = cars_train_X.columns"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"30118.8791272898"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Trenowanie modelu i błąd na train\n",
"model = LinearRegression(positive=True)\n",
"model.fit(cars_train_X, cars_train_Y)\n",
"predictions = model.predict(cars_train_X)\n",
"sqrt(mean_squared_error(predictions, cars_train_Y))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"cars_dev_0_X = pd.read_csv('dev-0/in.tsv', sep=\"\\t\", names=header[1:])\n",
"cars_dev_0_Y = pd.read_csv('dev-0/expected.tsv', sep=\"\\t\", header=None).to_numpy().flatten('F')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mileage</th>\n",
" <th>year</th>\n",
" <th>engineCapacity</th>\n",
" <th>brand_Abarth</th>\n",
" <th>brand_Aixam</th>\n",
" <th>brand_Alfa</th>\n",
" <th>brand_Aston</th>\n",
" <th>brand_Audi</th>\n",
" <th>brand_Austin</th>\n",
" <th>brand_BMW</th>\n",
" <th>...</th>\n",
" <th>brand_Uaz</th>\n",
" <th>brand_Vauxhall</th>\n",
" <th>brand_Volkswagen</th>\n",
" <th>brand_Volvo</th>\n",
" <th>brand_Warszawa</th>\n",
" <th>brand_dla</th>\n",
" <th>brand_star</th>\n",
" <th>engineType_benzyna</th>\n",
" <th>engineType_diesel</th>\n",
" <th>engineType_gaz</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>77000</td>\n",
" <td>2015</td>\n",
" <td>2000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>186146</td>\n",
" <td>2006</td>\n",
" <td>1498</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>192000</td>\n",
" <td>2007</td>\n",
" <td>2500</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>220000</td>\n",
" <td>2003</td>\n",
" <td>1997</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>248000</td>\n",
" <td>2008</td>\n",
" <td>1900</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>995</th>\n",
" <td>146000</td>\n",
" <td>2004</td>\n",
" <td>1686</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>996</th>\n",
" <td>19323</td>\n",
" <td>2015</td>\n",
" <td>1598</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>997</th>\n",
" <td>27561</td>\n",
" <td>2016</td>\n",
" <td>1598</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>998</th>\n",
" <td>155000</td>\n",
" <td>2012</td>\n",
" <td>1600</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999</th>\n",
" <td>31438</td>\n",
" <td>2015</td>\n",
" <td>3000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1000 rows × 96 columns</p>\n",
"</div>"
],
"text/plain": [
" mileage year engineCapacity brand_Abarth brand_Aixam brand_Alfa \\\n",
"0 77000 2015 2000 0 0 0 \n",
"1 186146 2006 1498 0 0 0 \n",
"2 192000 2007 2500 0 0 0 \n",
"3 220000 2003 1997 0 0 0 \n",
"4 248000 2008 1900 0 0 0 \n",
".. ... ... ... ... ... ... \n",
"995 146000 2004 1686 0 0 0 \n",
"996 19323 2015 1598 0 0 0 \n",
"997 27561 2016 1598 0 0 0 \n",
"998 155000 2012 1600 0 0 0 \n",
"999 31438 2015 3000 0 0 0 \n",
"\n",
" brand_Aston brand_Audi brand_Austin brand_BMW ... brand_Uaz \\\n",
"0 0 0 0 0 ... 0 \n",
"1 0 0 0 0 ... 0 \n",
"2 0 0 0 0 ... 0 \n",
"3 0 0 0 0 ... 0 \n",
"4 0 0 0 0 ... 0 \n",
".. ... ... ... ... ... ... \n",
"995 0 0 0 0 ... 0 \n",
"996 0 0 0 0 ... 0 \n",
"997 0 0 0 0 ... 0 \n",
"998 0 0 0 0 ... 0 \n",
"999 0 0 0 0 ... 0 \n",
"\n",
" brand_Vauxhall brand_Volkswagen brand_Volvo brand_Warszawa brand_dla \\\n",
"0 0 0 0 0 0 \n",
"1 0 0 0 0 0 \n",
"2 0 0 0 0 0 \n",
"3 0 0 0 0 0 \n",
"4 0 1 0 0 0 \n",
".. ... ... ... ... ... \n",
"995 0 0 0 0 0 \n",
"996 0 0 0 0 0 \n",
"997 0 0 0 0 0 \n",
"998 0 0 0 0 0 \n",
"999 0 0 0 0 0 \n",
"\n",
" brand_star engineType_benzyna engineType_diesel engineType_gaz \n",
"0 0 0 1 0 \n",
"1 0 1 0 0 \n",
"2 0 0 1 0 \n",
"3 0 0 1 0 \n",
"4 0 0 1 0 \n",
".. ... ... ... ... \n",
"995 0 0 1 0 \n",
"996 0 1 0 0 \n",
"997 0 0 1 0 \n",
"998 0 1 0 0 \n",
"999 0 0 1 0 \n",
"\n",
"[1000 rows x 96 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Przygotowanie inputu dla DEV_0\n",
"cars_dev_0_X = pd.get_dummies(cars_dev_0_X)\n",
"columns_to_add = [x for x in input_columns if x not in cars_dev_0_X.columns]\n",
"for column in columns_to_add:\n",
" cars_dev_0_X[column] = 0\n",
"cars_dev_0_X = cars_dev_0_X[input_columns]\n",
"cars_dev_0_X"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"33193.54683638966"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Wynik dla DEV_0\n",
"predictions_dev = model.predict(cars_dev_0_X)\n",
"np.savetxt(\"dev-0/out.tsv\", predictions_dev, fmt='%f')\n",
"sqrt(mean_squared_error(predictions_dev, cars_dev_0_Y))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"cars_test_A_X = pd.read_csv('test-A/in.tsv', sep=\"\\t\", names=header[1:])"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mileage</th>\n",
" <th>year</th>\n",
" <th>engineCapacity</th>\n",
" <th>brand_Abarth</th>\n",
" <th>brand_Aixam</th>\n",
" <th>brand_Alfa</th>\n",
" <th>brand_Aston</th>\n",
" <th>brand_Audi</th>\n",
" <th>brand_Austin</th>\n",
" <th>brand_BMW</th>\n",
" <th>...</th>\n",
" <th>brand_Uaz</th>\n",
" <th>brand_Vauxhall</th>\n",
" <th>brand_Volkswagen</th>\n",
" <th>brand_Volvo</th>\n",
" <th>brand_Warszawa</th>\n",
" <th>brand_dla</th>\n",
" <th>brand_star</th>\n",
" <th>engineType_benzyna</th>\n",
" <th>engineType_diesel</th>\n",
" <th>engineType_gaz</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>203000</td>\n",
" <td>2010</td>\n",
" <td>1500</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>39000</td>\n",
" <td>2008</td>\n",
" <td>1000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>190000</td>\n",
" <td>2005</td>\n",
" <td>1600</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>230000</td>\n",
" <td>2001</td>\n",
" <td>1598</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>189000</td>\n",
" <td>2000</td>\n",
" <td>1600</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>995</th>\n",
" <td>465000</td>\n",
" <td>2005</td>\n",
" <td>2500</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>996</th>\n",
" <td>89074</td>\n",
" <td>2014</td>\n",
" <td>2000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>997</th>\n",
" <td>21711</td>\n",
" <td>2014</td>\n",
" <td>1329</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>998</th>\n",
" <td>144000</td>\n",
" <td>2014</td>\n",
" <td>1500</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>999</th>\n",
" <td>113606</td>\n",
" <td>2000</td>\n",
" <td>4000</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1000 rows × 96 columns</p>\n",
"</div>"
],
"text/plain": [
" mileage year engineCapacity brand_Abarth brand_Aixam brand_Alfa \\\n",
"0 203000 2010 1500 0 0 0 \n",
"1 39000 2008 1000 0 0 0 \n",
"2 190000 2005 1600 0 0 0 \n",
"3 230000 2001 1598 0 0 0 \n",
"4 189000 2000 1600 0 0 0 \n",
".. ... ... ... ... ... ... \n",
"995 465000 2005 2500 0 0 0 \n",
"996 89074 2014 2000 0 0 0 \n",
"997 21711 2014 1329 0 0 0 \n",
"998 144000 2014 1500 0 0 0 \n",
"999 113606 2000 4000 0 0 0 \n",
"\n",
" brand_Aston brand_Audi brand_Austin brand_BMW ... brand_Uaz \\\n",
"0 0 0 0 0 ... 0 \n",
"1 0 0 0 0 ... 0 \n",
"2 0 0 0 0 ... 0 \n",
"3 0 0 0 0 ... 0 \n",
"4 0 0 0 1 ... 0 \n",
".. ... ... ... ... ... ... \n",
"995 0 0 0 0 ... 0 \n",
"996 0 0 0 1 ... 0 \n",
"997 0 0 0 0 ... 0 \n",
"998 0 0 0 0 ... 0 \n",
"999 0 0 0 0 ... 0 \n",
"\n",
" brand_Vauxhall brand_Volkswagen brand_Volvo brand_Warszawa brand_dla \\\n",
"0 0 0 0 0 0 \n",
"1 0 0 0 0 0 \n",
"2 0 0 0 0 0 \n",
"3 0 1 0 0 0 \n",
"4 0 0 0 0 0 \n",
".. ... ... ... ... ... \n",
"995 0 0 0 0 0 \n",
"996 0 0 0 0 0 \n",
"997 0 0 0 0 0 \n",
"998 0 0 0 0 0 \n",
"999 0 0 0 0 0 \n",
"\n",
" brand_star engineType_benzyna engineType_diesel engineType_gaz \n",
"0 0 0 1 0 \n",
"1 0 1 0 0 \n",
"2 0 0 1 0 \n",
"3 0 1 0 0 \n",
"4 0 1 0 0 \n",
".. ... ... ... ... \n",
"995 0 0 1 0 \n",
"996 0 0 1 0 \n",
"997 0 1 0 0 \n",
"998 0 0 1 0 \n",
"999 0 1 0 0 \n",
"\n",
"[1000 rows x 96 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Dostosowanie inputu dla testu\n",
"cars_test_A_X = pd.get_dummies(cars_test_A_X)\n",
"columns_to_add = [x for x in input_columns if x not in cars_test_A_X.columns]\n",
"for column in columns_to_add:\n",
" cars_test_A_X[column] = 0\n",
"cars_test_A_X = cars_test_A_X[input_columns]\n",
"cars_test_A_X"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"#Predykcja i zapisywanie wyniku dla testu\n",
"predictions_test = model.predict(cars_test_A_X)\n",
"np.savetxt(\"test-A/out.tsv\", predictions_test, fmt='%f')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

File diff suppressed because it is too large Load Diff

1000
in.tsv

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff