{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: kaggle in \\\\files\\students\\s487174\\.appdata\\python\\python310\\site-packages (1.5.13)\n", "Requirement already satisfied: urllib3 in c:\\software\\python3\\lib\\site-packages (from kaggle) (1.26.14)\n", "Requirement already satisfied: requests in c:\\software\\python3\\lib\\site-packages (from kaggle) (2.28.2)\n", "Requirement already satisfied: tqdm in c:\\software\\python3\\lib\\site-packages (from kaggle) (4.64.1)\n", "Requirement already satisfied: python-dateutil in c:\\software\\python3\\lib\\site-packages (from kaggle) (2.8.2)\n", "Requirement already satisfied: six>=1.10 in c:\\software\\python3\\lib\\site-packages (from kaggle) (1.16.0)\n", "Requirement already satisfied: python-slugify in \\\\files\\students\\s487174\\.appdata\\python\\python310\\site-packages (from kaggle) (8.0.1)\n", "Requirement already satisfied: certifi in c:\\software\\python3\\lib\\site-packages (from kaggle) (2022.12.7)\n", "Requirement already satisfied: text-unidecode>=1.3 in \\\\files\\students\\s487174\\.appdata\\python\\python310\\site-packages (from python-slugify->kaggle) (1.3)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in c:\\software\\python3\\lib\\site-packages (from requests->kaggle) (3.0.1)\n", "Requirement already satisfied: idna<4,>=2.5 in c:\\software\\python3\\lib\\site-packages (from requests->kaggle) (3.4)\n", "Requirement already satisfied: colorama in c:\\software\\python3\\lib\\site-packages (from tqdm->kaggle) (0.4.6)\n", "Requirement already satisfied: pandas in c:\\software\\python3\\lib\\site-packages (1.5.3)\n", "Requirement already satisfied: pytz>=2020.1 in c:\\software\\python3\\lib\\site-packages (from pandas) (2022.7.1)\n", "Requirement already satisfied: numpy>=1.21.0 in c:\\software\\python3\\lib\\site-packages (from pandas) (1.24.2)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in c:\\software\\python3\\lib\\site-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: six>=1.5 in c:\\software\\python3\\lib\\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n", "Requirement already satisfied: pandas in c:\\software\\python3\\lib\\site-packages (1.5.3)\n", "Requirement already satisfied: numpy>=1.21.0 in c:\\software\\python3\\lib\\site-packages (from pandas) (1.24.2)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in c:\\software\\python3\\lib\\site-packages (from pandas) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in c:\\software\\python3\\lib\\site-packages (from pandas) (2022.7.1)\n", "Requirement already satisfied: six>=1.5 in c:\\software\\python3\\lib\\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n", "Requirement already satisfied: seaborn in \\\\files\\students\\s487174\\.appdata\\python\\python310\\site-packages (0.12.2)\n", "Requirement already satisfied: numpy!=1.24.0,>=1.17 in c:\\software\\python3\\lib\\site-packages (from seaborn) (1.24.2)\n", "Requirement already satisfied: pandas>=0.25 in c:\\software\\python3\\lib\\site-packages (from seaborn) (1.5.3)\n", "Requirement already satisfied: matplotlib!=3.6.1,>=3.1 in c:\\software\\python3\\lib\\site-packages (from seaborn) (3.7.0)\n", "Requirement already satisfied: contourpy>=1.0.1 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.0.7)\n", "Requirement already satisfied: fonttools>=4.22.0 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (4.38.0)\n", "Requirement already satisfied: cycler>=0.10 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (0.11.0)\n", "Requirement already satisfied: python-dateutil>=2.7 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (2.8.2)\n", "Requirement already satisfied: pyparsing>=2.3.1 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (3.0.9)\n", "Requirement already satisfied: pillow>=6.2.0 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (9.4.0)\n", "Requirement already satisfied: packaging>=20.0 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (23.0)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.4.4)\n", "Requirement already satisfied: pytz>=2020.1 in c:\\software\\python3\\lib\\site-packages (from pandas>=0.25->seaborn) (2022.7.1)\n", "Requirement already satisfied: six>=1.5 in c:\\software\\python3\\lib\\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.1->seaborn) (1.16.0)\n", "Note: you may need to restart the kernel to use updated packages.\n", "poland-cars-for-sale-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)\n", "Archive: poland-cars-for-sale-dataset.zip\n", " inflating: Car_sale_ads.csv \n", "208305 Car_sale_ads.csv\n", "Index,Price,Currency,Condition,Vehicle_brand,Vehicle_model,Vehicle_version,Vehicle_generation,Production_year,Mileage_km,Power_HP,Displacement_cm3,Fuel_type,CO2_emissions,Drive,Transmission,Type,Doors_number,Colour,Origin_country,First_owner,First_registration_date,Offer_publication_date,Offer_location,Features\n", "0,86200,PLN,New,Abarth,595,,,2021,1.0,145.0,1400.0,Gasoline,,Front wheels,Manual,small_cars,3.0,gray,,,,04/05/2021,\"ul. Jubilerska 6 - 04-190 Warszawa, Mazowieckie (Polska)\",[]\n", "1,43500,PLN,Used,Abarth,Other,,,1974,59000.0,75.0,1100.0,Gasoline,,Front wheels,Manual,coupe,2.0,silver,,,,03/05/2021,\"kanonierska12 - 04-425 Warszawa, Rembertów (Polska)\",[]\n", "2,44900,PLN,Used,Abarth,500,,,2018,52000.0,180.0,1368.0,Gasoline,,,Automatic,small_cars,3.0,silver,,,,03/05/2021,\"Warszawa, Mazowieckie, Białołęka\",\"['ABS', 'Electric front windows', 'Drivers airbag', 'Power steering', 'ASR (traction control)', 'Rear view camera', 'Heated side mirrors', 'CD', 'Electrically adjustable mirrors', 'Passengers airbag', 'Alarm', 'Bluetooth', 'Automatic air conditioning', 'Airbag protecting the knees', 'Central locking', 'Immobilizer', 'Factory radio', 'Alloy wheels', 'Rain sensor', 'On-board computer', 'Multifunction steering wheel']\"\n", "3,39900,PLN,Used,Abarth,500,,,2012,29000.0,160.0,1368.0,Gasoline,139.0,Front wheels,Manual,small_cars,3.0,gray,,,,30/04/2021,\"Jaworzno, Śląskie\",\"['ABS', 'Electric front windows', 'Drivers airbag', 'Power steering', 'Bluetooth', 'AUX socket', 'On-board computer', 'Xenon lights', 'CD', 'Electrically adjustable mirrors', 'Passengers airbag', 'Alloy wheels', 'Rain sensor', 'USB socket', 'MP3', 'Multifunction steering wheel', 'Central locking', 'Immobilizer', 'Factory radio', 'ASR (traction control)', 'ESP(stabilization of the track)', 'Automatic air conditioning', 'Front side airbags']\"\n" ] } ], "source": [ "%pip install --user kaggle\n", "%pip install --user pandas\n", "\n", "%pip install --user pandas\n", "%pip install --user seaborn\n", "\n", "!kaggle datasets download -d bartoszpieniak/poland-cars-for-sale-dataset\n", "\n", "!unzip -o poland-cars-for-sale-dataset.zip\n", "\n", "!wc -l Car_sale_ads.csv\n", "\n", "!head -n 5 Car_sale_ads.csv" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Front wheels 139944\n", "Rear wheels 18081\n", "4x4 (permanent) 16986\n", "4x4 (attached automatically) 15420\n", "4x4 (attached manually) 2797\n", "Name: Drive, dtype: int64" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "cars=pd.read_csv('Car_sale_ads.csv')\n", "cars\n", "cars.describe(include='all')\n", "cars[\"Drive\"].value_counts()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Front wheels 658\n", "4x4 (permanent) 87\n", "4x4 (attached automatically) 84\n", "Rear wheels 82\n", "4x4 (attached manually) 13\n", "Name: Drive, dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Split\n", "\n", "from sklearn.model_selection import train_test_split\n", "\n", "cars_train, cars_test = train_test_split(cars, test_size=1000, random_state=1)\n", "cars_train[\"Drive\"].value_counts()\n", "cars_test[\"Drive\"].value_counts()\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "#cars_train, cars_test = train_test_split(cars, test_size=50, random_state=1, stratify=cars[\"Drive\"])\n", "#cars_train[\"Drive\"].value_counts()\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.10" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }