1030 lines
39 KiB
Plaintext
1030 lines
39 KiB
Plaintext
|
{
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 0,
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"provenance": [],
|
||
|
"history_visible": true
|
||
|
},
|
||
|
"kernelspec": {
|
||
|
"name": "python3",
|
||
|
"display_name": "Python 3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"name": "python"
|
||
|
}
|
||
|
},
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "SXcGWK6GBeDz",
|
||
|
"outputId": "ff6683a6-819f-4a8e-d2cc-b5b1871719f8"
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"name": "stdout",
|
||
|
"text": [
|
||
|
"Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /root/.kaggle/kaggle.json'\n",
|
||
|
"Downloading car-prices-poland.zip to /content\n",
|
||
|
" 0% 0.00/1.64M [00:00<?, ?B/s]\n",
|
||
|
"100% 1.64M/1.64M [00:00<00:00, 120MB/s]\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"!kaggle datasets download -d aleksandrglotov/car-prices-poland"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"!unzip -o car-prices-poland.zip"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "pEPVl2iGEAKg",
|
||
|
"outputId": "4ce5acd9-9bc4-46ed-d993-e434edd70037"
|
||
|
},
|
||
|
"execution_count": 4,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"name": "stdout",
|
||
|
"text": [
|
||
|
"Archive: car-prices-poland.zip\n",
|
||
|
" inflating: Car_Prices_Poland_Kaggle.csv \n"
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"!wc -l Car_Prices_Poland_Kaggle.csv"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "-V3yoW74Egpg",
|
||
|
"outputId": "ce016264-dffa-4b1a-c9e4-fc5e8971610e"
|
||
|
},
|
||
|
"execution_count": 5,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"name": "stdout",
|
||
|
"text": [
|
||
|
"117928 Car_Prices_Poland_Kaggle.csv\n"
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"!head -n 5 Car_Prices_Poland_Kaggle.csv"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "iHZMyIcDEsGY",
|
||
|
"outputId": "33f9297a-91b9-40b8-a2c2-f25e637440f0"
|
||
|
},
|
||
|
"execution_count": 6,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"name": "stdout",
|
||
|
"text": [
|
||
|
",mark,model,generation_name,year,mileage,vol_engine,fuel,city,province,price\n",
|
||
|
"0,opel,combo,gen-d-2011,2015,139568,1248,Diesel,Janki,Mazowieckie,35900\n",
|
||
|
"1,opel,combo,gen-d-2011,2018,31991,1499,Diesel,Katowice,Śląskie,78501\n",
|
||
|
"2,opel,combo,gen-d-2011,2015,278437,1598,Diesel,Brzeg,Opolskie,27000\n",
|
||
|
"3,opel,combo,gen-d-2011,2016,47600,1248,Diesel,Korfantów,Opolskie,30800\n"
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"!pip install --user pandas\n",
|
||
|
"!pip install --user seaborn"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "vUqpo9UOFIZ4",
|
||
|
"outputId": "44766fea-2fce-458c-a852-0a0467d34254"
|
||
|
},
|
||
|
"execution_count": 7,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"name": "stdout",
|
||
|
"text": [
|
||
|
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
|
||
|
"Requirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (1.4.4)\n",
|
||
|
"Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas) (2.8.2)\n",
|
||
|
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas) (2022.7.1)\n",
|
||
|
"Requirement already satisfied: numpy>=1.18.5 in /usr/local/lib/python3.9/dist-packages (from pandas) (1.22.4)\n",
|
||
|
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from python-dateutil>=2.8.1->pandas) (1.15.0)\n",
|
||
|
"Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
|
||
|
"Requirement already satisfied: seaborn in /usr/local/lib/python3.9/dist-packages (0.12.2)\n",
|
||
|
"Requirement already satisfied: pandas>=0.25 in /usr/local/lib/python3.9/dist-packages (from seaborn) (1.4.4)\n",
|
||
|
"Requirement already satisfied: matplotlib!=3.6.1,>=3.1 in /usr/local/lib/python3.9/dist-packages (from seaborn) (3.7.1)\n",
|
||
|
"Requirement already satisfied: numpy!=1.24.0,>=1.17 in /usr/local/lib/python3.9/dist-packages (from seaborn) (1.22.4)\n",
|
||
|
"Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (23.0)\n",
|
||
|
"Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.9/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (2.8.2)\n",
|
||
|
"Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.4.4)\n",
|
||
|
"Requirement already satisfied: importlib-resources>=3.2.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (5.12.0)\n",
|
||
|
"Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (3.0.9)\n",
|
||
|
"Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.9/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (0.11.0)\n",
|
||
|
"Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (4.39.0)\n",
|
||
|
"Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.0.7)\n",
|
||
|
"Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (8.4.0)\n",
|
||
|
"Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas>=0.25->seaborn) (2022.7.1)\n",
|
||
|
"Requirement already satisfied: zipp>=3.1.0 in /usr/local/lib/python3.9/dist-packages (from importlib-resources>=3.2.0->matplotlib!=3.6.1,>=3.1->seaborn) (3.15.0)\n",
|
||
|
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.1->seaborn) (1.15.0)\n"
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"import pandas as pd\n",
|
||
|
"cars = pd.read_csv('Car_Prices_Poland_Kaggle.csv')"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "YWOwBUSMFLkI"
|
||
|
},
|
||
|
"execution_count": 8,
|
||
|
"outputs": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"cars.describe(include='all')"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/",
|
||
|
"height": 458
|
||
|
},
|
||
|
"id": "juZ7gGxSFkyn",
|
||
|
"outputId": "4ab59d9c-a016-45af-aef5-1cb76a8543ab"
|
||
|
},
|
||
|
"execution_count": 9,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "execute_result",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
" Unnamed: 0 mark model generation_name year \\\n",
|
||
|
"count 117927.000000 117927 117927 87842 117927.000000 \n",
|
||
|
"unique NaN 23 328 364 NaN \n",
|
||
|
"top NaN audi astra gen-8p-2003-2012 NaN \n",
|
||
|
"freq NaN 12031 3331 1567 NaN \n",
|
||
|
"mean 58963.000000 NaN NaN NaN 2012.925259 \n",
|
||
|
"std 34042.736935 NaN NaN NaN 5.690135 \n",
|
||
|
"min 0.000000 NaN NaN NaN 1945.000000 \n",
|
||
|
"25% 29481.500000 NaN NaN NaN 2009.000000 \n",
|
||
|
"50% 58963.000000 NaN NaN NaN 2013.000000 \n",
|
||
|
"75% 88444.500000 NaN NaN NaN 2018.000000 \n",
|
||
|
"max 117926.000000 NaN NaN NaN 2022.000000 \n",
|
||
|
"\n",
|
||
|
" mileage vol_engine fuel city province \\\n",
|
||
|
"count 1.179270e+05 117927.000000 117927 117927 117927 \n",
|
||
|
"unique NaN NaN 6 4427 23 \n",
|
||
|
"top NaN NaN Gasoline Warszawa Mazowieckie \n",
|
||
|
"freq NaN NaN 61597 7972 22219 \n",
|
||
|
"mean 1.409768e+05 1812.057782 NaN NaN NaN \n",
|
||
|
"std 9.236936e+04 643.613438 NaN NaN NaN \n",
|
||
|
"min 0.000000e+00 0.000000 NaN NaN NaN \n",
|
||
|
"25% 6.700000e+04 1461.000000 NaN NaN NaN \n",
|
||
|
"50% 1.462690e+05 1796.000000 NaN NaN NaN \n",
|
||
|
"75% 2.030000e+05 1995.000000 NaN NaN NaN \n",
|
||
|
"max 2.800000e+06 7600.000000 NaN NaN NaN \n",
|
||
|
"\n",
|
||
|
" price \n",
|
||
|
"count 1.179270e+05 \n",
|
||
|
"unique NaN \n",
|
||
|
"top NaN \n",
|
||
|
"freq NaN \n",
|
||
|
"mean 7.029988e+04 \n",
|
||
|
"std 8.482458e+04 \n",
|
||
|
"min 5.000000e+02 \n",
|
||
|
"25% 2.100000e+04 \n",
|
||
|
"50% 4.190000e+04 \n",
|
||
|
"75% 8.360000e+04 \n",
|
||
|
"max 2.399900e+06 "
|
||
|
],
|
||
|
"text/html": [
|
||
|
"\n",
|
||
|
" <div id=\"df-a32a132e-c7e8-420a-8a58-6887af2dd461\">\n",
|
||
|
" <div class=\"colab-df-container\">\n",
|
||
|
" <div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>Unnamed: 0</th>\n",
|
||
|
" <th>mark</th>\n",
|
||
|
" <th>model</th>\n",
|
||
|
" <th>generation_name</th>\n",
|
||
|
" <th>year</th>\n",
|
||
|
" <th>mileage</th>\n",
|
||
|
" <th>vol_engine</th>\n",
|
||
|
" <th>fuel</th>\n",
|
||
|
" <th>city</th>\n",
|
||
|
" <th>province</th>\n",
|
||
|
" <th>price</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>count</th>\n",
|
||
|
" <td>117927.000000</td>\n",
|
||
|
" <td>117927</td>\n",
|
||
|
" <td>117927</td>\n",
|
||
|
" <td>87842</td>\n",
|
||
|
" <td>117927.000000</td>\n",
|
||
|
" <td>1.179270e+05</td>\n",
|
||
|
" <td>117927.000000</td>\n",
|
||
|
" <td>117927</td>\n",
|
||
|
" <td>117927</td>\n",
|
||
|
" <td>117927</td>\n",
|
||
|
" <td>1.179270e+05</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>unique</th>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>23</td>\n",
|
||
|
" <td>328</td>\n",
|
||
|
" <td>364</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>6</td>\n",
|
||
|
" <td>4427</td>\n",
|
||
|
" <td>23</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>top</th>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>audi</td>\n",
|
||
|
" <td>astra</td>\n",
|
||
|
" <td>gen-8p-2003-2012</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>Gasoline</td>\n",
|
||
|
" <td>Warszawa</td>\n",
|
||
|
" <td>Mazowieckie</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>freq</th>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>12031</td>\n",
|
||
|
" <td>3331</td>\n",
|
||
|
" <td>1567</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>61597</td>\n",
|
||
|
" <td>7972</td>\n",
|
||
|
" <td>22219</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>mean</th>\n",
|
||
|
" <td>58963.000000</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>2012.925259</td>\n",
|
||
|
" <td>1.409768e+05</td>\n",
|
||
|
" <td>1812.057782</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>7.029988e+04</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>std</th>\n",
|
||
|
" <td>34042.736935</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>5.690135</td>\n",
|
||
|
" <td>9.236936e+04</td>\n",
|
||
|
" <td>643.613438</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>8.482458e+04</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>min</th>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>1945.000000</td>\n",
|
||
|
" <td>0.000000e+00</td>\n",
|
||
|
" <td>0.000000</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>5.000000e+02</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>25%</th>\n",
|
||
|
" <td>29481.500000</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>2009.000000</td>\n",
|
||
|
" <td>6.700000e+04</td>\n",
|
||
|
" <td>1461.000000</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>2.100000e+04</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>50%</th>\n",
|
||
|
" <td>58963.000000</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>2013.000000</td>\n",
|
||
|
" <td>1.462690e+05</td>\n",
|
||
|
" <td>1796.000000</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>4.190000e+04</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>75%</th>\n",
|
||
|
" <td>88444.500000</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>2018.000000</td>\n",
|
||
|
" <td>2.030000e+05</td>\n",
|
||
|
" <td>1995.000000</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>8.360000e+04</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>max</th>\n",
|
||
|
" <td>117926.000000</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>2022.000000</td>\n",
|
||
|
" <td>2.800000e+06</td>\n",
|
||
|
" <td>7600.000000</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>2.399900e+06</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>\n",
|
||
|
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-a32a132e-c7e8-420a-8a58-6887af2dd461')\"\n",
|
||
|
" title=\"Convert this dataframe to an interactive table.\"\n",
|
||
|
" style=\"display:none;\">\n",
|
||
|
" \n",
|
||
|
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
|
||
|
" width=\"24px\">\n",
|
||
|
" <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
|
||
|
" <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
|
||
|
" </svg>\n",
|
||
|
" </button>\n",
|
||
|
" \n",
|
||
|
" <style>\n",
|
||
|
" .colab-df-container {\n",
|
||
|
" display:flex;\n",
|
||
|
" flex-wrap:wrap;\n",
|
||
|
" gap: 12px;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .colab-df-convert {\n",
|
||
|
" background-color: #E8F0FE;\n",
|
||
|
" border: none;\n",
|
||
|
" border-radius: 50%;\n",
|
||
|
" cursor: pointer;\n",
|
||
|
" display: none;\n",
|
||
|
" fill: #1967D2;\n",
|
||
|
" height: 32px;\n",
|
||
|
" padding: 0 0 0 0;\n",
|
||
|
" width: 32px;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .colab-df-convert:hover {\n",
|
||
|
" background-color: #E2EBFA;\n",
|
||
|
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
|
||
|
" fill: #174EA6;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" [theme=dark] .colab-df-convert {\n",
|
||
|
" background-color: #3B4455;\n",
|
||
|
" fill: #D2E3FC;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" [theme=dark] .colab-df-convert:hover {\n",
|
||
|
" background-color: #434B5C;\n",
|
||
|
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
|
||
|
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
|
||
|
" fill: #FFFFFF;\n",
|
||
|
" }\n",
|
||
|
" </style>\n",
|
||
|
"\n",
|
||
|
" <script>\n",
|
||
|
" const buttonEl =\n",
|
||
|
" document.querySelector('#df-a32a132e-c7e8-420a-8a58-6887af2dd461 button.colab-df-convert');\n",
|
||
|
" buttonEl.style.display =\n",
|
||
|
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
|
||
|
"\n",
|
||
|
" async function convertToInteractive(key) {\n",
|
||
|
" const element = document.querySelector('#df-a32a132e-c7e8-420a-8a58-6887af2dd461');\n",
|
||
|
" const dataTable =\n",
|
||
|
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
|
||
|
" [key], {});\n",
|
||
|
" if (!dataTable) return;\n",
|
||
|
"\n",
|
||
|
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
|
||
|
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
|
||
|
" + ' to learn more about interactive tables.';\n",
|
||
|
" element.innerHTML = '';\n",
|
||
|
" dataTable['output_type'] = 'display_data';\n",
|
||
|
" await google.colab.output.renderOutput(dataTable, element);\n",
|
||
|
" const docLink = document.createElement('div');\n",
|
||
|
" docLink.innerHTML = docLinkHtml;\n",
|
||
|
" element.appendChild(docLink);\n",
|
||
|
" }\n",
|
||
|
" </script>\n",
|
||
|
" </div>\n",
|
||
|
" </div>\n",
|
||
|
" "
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"execution_count": 9
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"cars = cars.drop(73436) #wiersz z błednymi danymi"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "L1nKE2QCKDrw"
|
||
|
},
|
||
|
"execution_count": 10,
|
||
|
"outputs": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"def normalize(df,feature_name):\n",
|
||
|
" result = df.copy()\n",
|
||
|
" max_value = df[feature_name].max()\n",
|
||
|
" min_value = df[feature_name].min()\n",
|
||
|
" result[feature_name] = (df[feature_name] - min_value) / (max_value - min_value)\n",
|
||
|
" return result"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"id": "_G1CH_2QQPAF"
|
||
|
},
|
||
|
"execution_count": 11,
|
||
|
"outputs": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [],
|
||
|
"metadata": {
|
||
|
"id": "apINI6GKPeda"
|
||
|
}
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"cars_normalized = normalize(cars,'vol_engine')\n",
|
||
|
"print(cars_normalized)"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "YPLfex7yPH8v",
|
||
|
"outputId": "9b44de1d-7776-40f0-b1a9-eac0d08b52b1"
|
||
|
},
|
||
|
"execution_count": 12,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"name": "stdout",
|
||
|
"text": [
|
||
|
" Unnamed: 0 mark model generation_name year mileage \\\n",
|
||
|
"0 0 opel combo gen-d-2011 2015 139568 \n",
|
||
|
"1 1 opel combo gen-d-2011 2018 31991 \n",
|
||
|
"2 2 opel combo gen-d-2011 2015 278437 \n",
|
||
|
"3 3 opel combo gen-d-2011 2016 47600 \n",
|
||
|
"4 4 opel combo gen-d-2011 2014 103000 \n",
|
||
|
"... ... ... ... ... ... ... \n",
|
||
|
"117922 117922 volvo xc-90 gen-ii-2014-xc-90 2020 40000 \n",
|
||
|
"117923 117923 volvo xc-90 gen-ii-2014-xc-90 2017 51000 \n",
|
||
|
"117924 117924 volvo xc-90 gen-ii-2014-xc-90 2016 83500 \n",
|
||
|
"117925 117925 volvo xc-90 gen-ii-2014-xc-90 2017 174000 \n",
|
||
|
"117926 117926 volvo xc-90 gen-ii-2014-xc-90 2016 189020 \n",
|
||
|
"\n",
|
||
|
" vol_engine fuel city province price \n",
|
||
|
"0 0.164211 Diesel Janki Mazowieckie 35900 \n",
|
||
|
"1 0.197237 Diesel Katowice Śląskie 78501 \n",
|
||
|
"2 0.210263 Diesel Brzeg Opolskie 27000 \n",
|
||
|
"3 0.164211 Diesel Korfantów Opolskie 30800 \n",
|
||
|
"4 0.184211 CNG Tarnowskie Góry Śląskie 35900 \n",
|
||
|
"... ... ... ... ... ... \n",
|
||
|
"117922 0.259079 Hybrid Katowice Śląskie 222790 \n",
|
||
|
"117923 0.259079 Diesel Chechło Pierwsze Łódzkie 229900 \n",
|
||
|
"117924 0.259079 Gasoline Pruszcz Gdański Pomorskie 135000 \n",
|
||
|
"117925 0.259079 Diesel Kalisz Wielkopolskie 154500 \n",
|
||
|
"117926 0.259079 Gasoline Sionna Mazowieckie 130000 \n",
|
||
|
"\n",
|
||
|
"[117926 rows x 11 columns]\n"
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"import sklearn\n",
|
||
|
"import sklearn.model_selection\n",
|
||
|
"cars_train, cars_test = sklearn.model_selection.train_test_split(cars_normalized, test_size=23586, random_state=1)\n",
|
||
|
"cars_train[\"province\"].value_counts()"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "PZwsQwgeSoHb",
|
||
|
"outputId": "8972c3e2-344b-482a-addf-23a799fbb3fb"
|
||
|
},
|
||
|
"execution_count": 14,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "execute_result",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"Mazowieckie 17750\n",
|
||
|
"Śląskie 13441\n",
|
||
|
"Wielkopolskie 11162\n",
|
||
|
"Małopolskie 7796\n",
|
||
|
"Dolnośląskie 7092\n",
|
||
|
"Łódzkie 6303\n",
|
||
|
"Pomorskie 6094\n",
|
||
|
"Kujawsko-pomorskie 4256\n",
|
||
|
"Lubelskie 3775\n",
|
||
|
"Zachodniopomorskie 3165\n",
|
||
|
"Podkarpackie 2826\n",
|
||
|
"Świętokrzyskie 2657\n",
|
||
|
"Warmińsko-mazurskie 2375\n",
|
||
|
"Lubuskie 2220\n",
|
||
|
"Podlaskie 1716\n",
|
||
|
"Opolskie 1679\n",
|
||
|
"Moravian-Silesian Region 27\n",
|
||
|
"Wiedeń 2\n",
|
||
|
"Berlin 2\n",
|
||
|
"Trenczyn 1\n",
|
||
|
"Niedersachsen 1\n",
|
||
|
"Name: province, dtype: int64"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"execution_count": 14
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"cars_dev, cars_test = sklearn.model_selection.train_test_split(cars_test, test_size=11793, random_state=1)\n",
|
||
|
"cars_dev[\"province\"].value_counts()"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "-ec5RLaXTgWK",
|
||
|
"outputId": "227a54eb-6c8f-4faf-c38b-cd3147202e92"
|
||
|
},
|
||
|
"execution_count": 15,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "execute_result",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"Mazowieckie 2261\n",
|
||
|
"Śląskie 1666\n",
|
||
|
"Wielkopolskie 1418\n",
|
||
|
"Małopolskie 948\n",
|
||
|
"Dolnośląskie 867\n",
|
||
|
"Łódzkie 775\n",
|
||
|
"Pomorskie 766\n",
|
||
|
"Kujawsko-pomorskie 532\n",
|
||
|
"Lubelskie 504\n",
|
||
|
"Zachodniopomorskie 396\n",
|
||
|
"Podkarpackie 365\n",
|
||
|
"Świętokrzyskie 353\n",
|
||
|
"Warmińsko-mazurskie 282\n",
|
||
|
"Lubuskie 263\n",
|
||
|
"Opolskie 199\n",
|
||
|
"Podlaskie 192\n",
|
||
|
"Moravian-Silesian Region 4\n",
|
||
|
"Nordrhein-Westfalen 1\n",
|
||
|
"Berlin 1\n",
|
||
|
"Name: province, dtype: int64"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"execution_count": 15
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"\n",
|
||
|
"cars_test[\"province\"].value_counts()"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "2VwezzlzUvZd",
|
||
|
"outputId": "5dece8a2-2d6b-4a25-fda5-be7c85b4765d"
|
||
|
},
|
||
|
"execution_count": 16,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "execute_result",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"Mazowieckie 2208\n",
|
||
|
"Śląskie 1599\n",
|
||
|
"Wielkopolskie 1436\n",
|
||
|
"Małopolskie 1012\n",
|
||
|
"Dolnośląskie 879\n",
|
||
|
"Łódzkie 806\n",
|
||
|
"Pomorskie 745\n",
|
||
|
"Kujawsko-pomorskie 583\n",
|
||
|
"Lubelskie 461\n",
|
||
|
"Zachodniopomorskie 402\n",
|
||
|
"Podkarpackie 362\n",
|
||
|
"Świętokrzyskie 327\n",
|
||
|
"Warmińsko-mazurskie 299\n",
|
||
|
"Lubuskie 260\n",
|
||
|
"Podlaskie 215\n",
|
||
|
"Opolskie 195\n",
|
||
|
"Moravian-Silesian Region 4\n",
|
||
|
"Name: province, dtype: int64"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"execution_count": 16
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"#Ilość wartości w zbiorach\n",
|
||
|
"print(cars_normalized.size)\n",
|
||
|
"print(cars_train.size)\n",
|
||
|
"print(cars_dev.size)\n",
|
||
|
"print(cars_test.size)"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "sprjCCXTV8W0",
|
||
|
"outputId": "3b12b8c4-279f-4751-f801-e97d2c81c01b"
|
||
|
},
|
||
|
"execution_count": 17,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"name": "stdout",
|
||
|
"text": [
|
||
|
"1297186\n",
|
||
|
"1037740\n",
|
||
|
"129723\n",
|
||
|
"129723\n"
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"#Średnie wartości parametrów\n",
|
||
|
"print(cars_normalized['price'].mean())\n",
|
||
|
"print(cars_train['price'].mean())\n",
|
||
|
"print(cars_dev['price'].mean())\n",
|
||
|
"print(cars_test['price'].mean())"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "TjvBBTAsXbUK",
|
||
|
"outputId": "644543bb-acb6-4bda-de01-ab92514b7de8"
|
||
|
},
|
||
|
"execution_count": 18,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"name": "stdout",
|
||
|
"text": [
|
||
|
"70299.94754337466\n",
|
||
|
"70432.62519609921\n",
|
||
|
"69244.09963537692\n",
|
||
|
"70294.41923174764\n"
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"#Najmniejsze ceny pojazdów\n",
|
||
|
"print(cars_normalized['price'].min())\n",
|
||
|
"print(cars_train['price'].min())\n",
|
||
|
"print(cars_dev['price'].min())\n",
|
||
|
"print(cars_test['price'].min())"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "GJI2qf-1YLbp",
|
||
|
"outputId": "20aec129-96c9-4adb-f3cb-25db4b2dc207"
|
||
|
},
|
||
|
"execution_count": 19,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"name": "stdout",
|
||
|
"text": [
|
||
|
"500\n",
|
||
|
"500\n",
|
||
|
"1250\n",
|
||
|
"900\n"
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"#Największe ceny pojazdów\n",
|
||
|
"print(cars_normalized['price'].max())\n",
|
||
|
"print(cars_train['price'].max())\n",
|
||
|
"print(cars_dev['price'].max())\n",
|
||
|
"print(cars_test['price'].max())"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "Ve8Cvu7IYx-E",
|
||
|
"outputId": "ec0b0167-74ad-4118-b1c8-734c80cd9d79"
|
||
|
},
|
||
|
"execution_count": 20,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"name": "stdout",
|
||
|
"text": [
|
||
|
"2399900\n",
|
||
|
"2399900\n",
|
||
|
"1368341\n",
|
||
|
"1000000\n"
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"#Odchylenie standardowe\n",
|
||
|
"print(cars_normalized['price'].std())\n",
|
||
|
"print(cars_train['price'].std())\n",
|
||
|
"print(cars_dev['price'].std())\n",
|
||
|
"print(cars_test['price'].std())"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "tGDytphgY7oB",
|
||
|
"outputId": "caf5152a-5c5d-42ca-95d5-8aa1afd8d46f"
|
||
|
},
|
||
|
"execution_count": 21,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"name": "stdout",
|
||
|
"text": [
|
||
|
"84824.93470827927\n",
|
||
|
"85120.16823252657\n",
|
||
|
"82128.74927832028\n",
|
||
|
"85111.52408658911\n"
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [],
|
||
|
"metadata": {
|
||
|
"id": "9JafBXorXIXy"
|
||
|
},
|
||
|
"execution_count": null,
|
||
|
"outputs": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"#Mediany cen pojazdów\n",
|
||
|
"print(cars_normalized['price'].median())\n",
|
||
|
"print(cars_train['price'].median())\n",
|
||
|
"print(cars_dev['price'].median())\n",
|
||
|
"print(cars_test['price'].median())"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "pdmR9mKpU78C",
|
||
|
"outputId": "e0fbd8a5-39b4-441f-8b64-1aaa210ba36c"
|
||
|
},
|
||
|
"execution_count": 22,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "stream",
|
||
|
"name": "stdout",
|
||
|
"text": [
|
||
|
"41900.0\n",
|
||
|
"41900.0\n",
|
||
|
"41901.0\n",
|
||
|
"40900.0\n"
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"#Podział według regionów\n",
|
||
|
"cars_normalized[\"province\"].value_counts()"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "MXSSLTdR-7xP",
|
||
|
"outputId": "7facce01-e2e8-415b-9384-74253f1717d1"
|
||
|
},
|
||
|
"execution_count": 26,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "execute_result",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"Mazowieckie 22219\n",
|
||
|
"Śląskie 16706\n",
|
||
|
"Wielkopolskie 14016\n",
|
||
|
"Małopolskie 9756\n",
|
||
|
"Dolnośląskie 8838\n",
|
||
|
"Łódzkie 7884\n",
|
||
|
"Pomorskie 7605\n",
|
||
|
"Kujawsko-pomorskie 5371\n",
|
||
|
"Lubelskie 4740\n",
|
||
|
"Zachodniopomorskie 3963\n",
|
||
|
"Podkarpackie 3553\n",
|
||
|
"Świętokrzyskie 3337\n",
|
||
|
"Warmińsko-mazurskie 2956\n",
|
||
|
"Lubuskie 2743\n",
|
||
|
"Podlaskie 2123\n",
|
||
|
"Opolskie 2073\n",
|
||
|
"Moravian-Silesian Region 35\n",
|
||
|
"Berlin 3\n",
|
||
|
"Wiedeń 2\n",
|
||
|
"Niedersachsen 1\n",
|
||
|
"Trenczyn 1\n",
|
||
|
"Nordrhein-Westfalen 1\n",
|
||
|
"Name: province, dtype: int64"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"execution_count": 26
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"source": [
|
||
|
"#Podział według marki\n",
|
||
|
"cars_normalized[\"mark\"].value_counts()"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"colab": {
|
||
|
"base_uri": "https://localhost:8080/"
|
||
|
},
|
||
|
"id": "XGlwLMbE_Mnf",
|
||
|
"outputId": "fd743df6-2043-45ff-bea1-b19f03869eb8"
|
||
|
},
|
||
|
"execution_count": 27,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"output_type": "execute_result",
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"audi 12031\n",
|
||
|
"opel 11914\n",
|
||
|
"bmw 11070\n",
|
||
|
"volkswagen 10848\n",
|
||
|
"ford 9664\n",
|
||
|
"mercedes-benz 7136\n",
|
||
|
"renault 6976\n",
|
||
|
"skoda 5888\n",
|
||
|
"toyota 5119\n",
|
||
|
"peugeot 5056\n",
|
||
|
"volvo 4384\n",
|
||
|
"hyundai 4032\n",
|
||
|
"kia 3744\n",
|
||
|
"nissan 3072\n",
|
||
|
"fiat 2880\n",
|
||
|
"mazda 2848\n",
|
||
|
"seat 2848\n",
|
||
|
"citroen 2720\n",
|
||
|
"honda 2176\n",
|
||
|
"mitsubishi 1120\n",
|
||
|
"mini 1088\n",
|
||
|
"alfa-romeo 704\n",
|
||
|
"chevrolet 608\n",
|
||
|
"Name: mark, dtype: int64"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"execution_count": 27
|
||
|
}
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"source": [],
|
||
|
"metadata": {
|
||
|
"id": "2a30BavmDAzQ"
|
||
|
}
|
||
|
}
|
||
|
]
|
||
|
}
|