{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "history_visible": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SXcGWK6GBeDz", "outputId": "ff6683a6-819f-4a8e-d2cc-b5b1871719f8" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /root/.kaggle/kaggle.json'\n", "Downloading car-prices-poland.zip to /content\n", " 0% 0.00/1.64M [00:00, ?B/s]\n", "100% 1.64M/1.64M [00:00<00:00, 120MB/s]\n" ] } ], "source": [ "!kaggle datasets download -d aleksandrglotov/car-prices-poland" ] }, { "cell_type": "code", "source": [ "!unzip -o car-prices-poland.zip" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pEPVl2iGEAKg", "outputId": "4ce5acd9-9bc4-46ed-d993-e434edd70037" }, "execution_count": 4, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Archive: car-prices-poland.zip\n", " inflating: Car_Prices_Poland_Kaggle.csv \n" ] } ] }, { "cell_type": "code", "source": [ "!wc -l Car_Prices_Poland_Kaggle.csv" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-V3yoW74Egpg", "outputId": "ce016264-dffa-4b1a-c9e4-fc5e8971610e" }, "execution_count": 5, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "117928 Car_Prices_Poland_Kaggle.csv\n" ] } ] }, { "cell_type": "code", "source": [ "!head -n 5 Car_Prices_Poland_Kaggle.csv" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "iHZMyIcDEsGY", "outputId": "33f9297a-91b9-40b8-a2c2-f25e637440f0" }, "execution_count": 6, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ ",mark,model,generation_name,year,mileage,vol_engine,fuel,city,province,price\n", "0,opel,combo,gen-d-2011,2015,139568,1248,Diesel,Janki,Mazowieckie,35900\n", "1,opel,combo,gen-d-2011,2018,31991,1499,Diesel,Katowice,Śląskie,78501\n", "2,opel,combo,gen-d-2011,2015,278437,1598,Diesel,Brzeg,Opolskie,27000\n", "3,opel,combo,gen-d-2011,2016,47600,1248,Diesel,Korfantów,Opolskie,30800\n" ] } ] }, { "cell_type": "code", "source": [ "!pip install --user pandas\n", "!pip install --user seaborn" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vUqpo9UOFIZ4", "outputId": "44766fea-2fce-458c-a852-0a0467d34254" }, "execution_count": 7, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.9/dist-packages (1.4.4)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.9/dist-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas) (2022.7.1)\n", "Requirement already satisfied: numpy>=1.18.5 in /usr/local/lib/python3.9/dist-packages (from pandas) (1.22.4)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from python-dateutil>=2.8.1->pandas) (1.15.0)\n", "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Requirement already satisfied: seaborn in /usr/local/lib/python3.9/dist-packages (0.12.2)\n", "Requirement already satisfied: pandas>=0.25 in /usr/local/lib/python3.9/dist-packages (from seaborn) (1.4.4)\n", "Requirement already satisfied: matplotlib!=3.6.1,>=3.1 in /usr/local/lib/python3.9/dist-packages (from seaborn) (3.7.1)\n", "Requirement already satisfied: numpy!=1.24.0,>=1.17 in /usr/local/lib/python3.9/dist-packages (from seaborn) (1.22.4)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (23.0)\n", "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.9/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (2.8.2)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.4.4)\n", "Requirement already satisfied: importlib-resources>=3.2.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (5.12.0)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (3.0.9)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.9/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (0.11.0)\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (4.39.0)\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.9/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.0.7)\n", "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.9/dist-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (8.4.0)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.9/dist-packages (from pandas>=0.25->seaborn) (2022.7.1)\n", "Requirement already satisfied: zipp>=3.1.0 in /usr/local/lib/python3.9/dist-packages (from importlib-resources>=3.2.0->matplotlib!=3.6.1,>=3.1->seaborn) (3.15.0)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.9/dist-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.1->seaborn) (1.15.0)\n" ] } ] }, { "cell_type": "code", "source": [ "import pandas as pd\n", "cars = pd.read_csv('Car_Prices_Poland_Kaggle.csv')" ], "metadata": { "id": "YWOwBUSMFLkI" }, "execution_count": 8, "outputs": [] }, { "cell_type": "code", "source": [ "cars.describe(include='all')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 458 }, "id": "juZ7gGxSFkyn", "outputId": "4ab59d9c-a016-45af-aef5-1cb76a8543ab" }, "execution_count": 9, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Unnamed: 0 mark model generation_name year \\\n", "count 117927.000000 117927 117927 87842 117927.000000 \n", "unique NaN 23 328 364 NaN \n", "top NaN audi astra gen-8p-2003-2012 NaN \n", "freq NaN 12031 3331 1567 NaN \n", "mean 58963.000000 NaN NaN NaN 2012.925259 \n", "std 34042.736935 NaN NaN NaN 5.690135 \n", "min 0.000000 NaN NaN NaN 1945.000000 \n", "25% 29481.500000 NaN NaN NaN 2009.000000 \n", "50% 58963.000000 NaN NaN NaN 2013.000000 \n", "75% 88444.500000 NaN NaN NaN 2018.000000 \n", "max 117926.000000 NaN NaN NaN 2022.000000 \n", "\n", " mileage vol_engine fuel city province \\\n", "count 1.179270e+05 117927.000000 117927 117927 117927 \n", "unique NaN NaN 6 4427 23 \n", "top NaN NaN Gasoline Warszawa Mazowieckie \n", "freq NaN NaN 61597 7972 22219 \n", "mean 1.409768e+05 1812.057782 NaN NaN NaN \n", "std 9.236936e+04 643.613438 NaN NaN NaN \n", "min 0.000000e+00 0.000000 NaN NaN NaN \n", "25% 6.700000e+04 1461.000000 NaN NaN NaN \n", "50% 1.462690e+05 1796.000000 NaN NaN NaN \n", "75% 2.030000e+05 1995.000000 NaN NaN NaN \n", "max 2.800000e+06 7600.000000 NaN NaN NaN \n", "\n", " price \n", "count 1.179270e+05 \n", "unique NaN \n", "top NaN \n", "freq NaN \n", "mean 7.029988e+04 \n", "std 8.482458e+04 \n", "min 5.000000e+02 \n", "25% 2.100000e+04 \n", "50% 4.190000e+04 \n", "75% 8.360000e+04 \n", "max 2.399900e+06 " ], "text/html": [ "\n", "
\n", " | Unnamed: 0 | \n", "mark | \n", "model | \n", "generation_name | \n", "year | \n", "mileage | \n", "vol_engine | \n", "fuel | \n", "city | \n", "province | \n", "price | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "117927.000000 | \n", "117927 | \n", "117927 | \n", "87842 | \n", "117927.000000 | \n", "1.179270e+05 | \n", "117927.000000 | \n", "117927 | \n", "117927 | \n", "117927 | \n", "1.179270e+05 | \n", "
unique | \n", "NaN | \n", "23 | \n", "328 | \n", "364 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "6 | \n", "4427 | \n", "23 | \n", "NaN | \n", "
top | \n", "NaN | \n", "audi | \n", "astra | \n", "gen-8p-2003-2012 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Gasoline | \n", "Warszawa | \n", "Mazowieckie | \n", "NaN | \n", "
freq | \n", "NaN | \n", "12031 | \n", "3331 | \n", "1567 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "61597 | \n", "7972 | \n", "22219 | \n", "NaN | \n", "
mean | \n", "58963.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "2012.925259 | \n", "1.409768e+05 | \n", "1812.057782 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "7.029988e+04 | \n", "
std | \n", "34042.736935 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "5.690135 | \n", "9.236936e+04 | \n", "643.613438 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "8.482458e+04 | \n", "
min | \n", "0.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1945.000000 | \n", "0.000000e+00 | \n", "0.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "5.000000e+02 | \n", "
25% | \n", "29481.500000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "2009.000000 | \n", "6.700000e+04 | \n", "1461.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "2.100000e+04 | \n", "
50% | \n", "58963.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "2013.000000 | \n", "1.462690e+05 | \n", "1796.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "4.190000e+04 | \n", "
75% | \n", "88444.500000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "2018.000000 | \n", "2.030000e+05 | \n", "1995.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "8.360000e+04 | \n", "
max | \n", "117926.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "2022.000000 | \n", "2.800000e+06 | \n", "7600.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "2.399900e+06 | \n", "