Update 'Zadanie1.ipynb'

This commit is contained in:
Tomasz Koszarek 2023-03-25 13:51:01 +01:00
parent 12f01afe73
commit a8b90f7168

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 3,
"metadata": {},
"outputs": [
{
@ -10,41 +10,42 @@
"output_type": "stream",
"text": [
"Requirement already satisfied: kaggle in \\\\files\\students\\s487174\\.appdata\\python\\python310\\site-packages (1.5.13)\n",
"Requirement already satisfied: python-dateutil in c:\\software\\python3\\lib\\site-packages (from kaggle) (2.8.2)\n",
"Requirement already satisfied: urllib3 in c:\\software\\python3\\lib\\site-packages (from kaggle) (1.26.14)\n",
"Requirement already satisfied: six>=1.10 in c:\\software\\python3\\lib\\site-packages (from kaggle) (1.16.0)\n",
"Requirement already satisfied: tqdm in c:\\software\\python3\\lib\\site-packages (from kaggle) (4.64.1)\n",
"Requirement already satisfied: certifi in c:\\software\\python3\\lib\\site-packages (from kaggle) (2022.12.7)\n",
"Requirement already satisfied: requests in c:\\software\\python3\\lib\\site-packages (from kaggle) (2.28.2)\n",
"Requirement already satisfied: tqdm in c:\\software\\python3\\lib\\site-packages (from kaggle) (4.64.1)\n",
"Requirement already satisfied: python-dateutil in c:\\software\\python3\\lib\\site-packages (from kaggle) (2.8.2)\n",
"Requirement already satisfied: six>=1.10 in c:\\software\\python3\\lib\\site-packages (from kaggle) (1.16.0)\n",
"Requirement already satisfied: python-slugify in \\\\files\\students\\s487174\\.appdata\\python\\python310\\site-packages (from kaggle) (8.0.1)\n",
"Requirement already satisfied: certifi in c:\\software\\python3\\lib\\site-packages (from kaggle) (2022.12.7)\n",
"Requirement already satisfied: text-unidecode>=1.3 in \\\\files\\students\\s487174\\.appdata\\python\\python310\\site-packages (from python-slugify->kaggle) (1.3)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\software\\python3\\lib\\site-packages (from requests->kaggle) (3.4)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\software\\python3\\lib\\site-packages (from requests->kaggle) (3.0.1)\n",
"Requirement already satisfied: idna<4,>=2.5 in c:\\software\\python3\\lib\\site-packages (from requests->kaggle) (3.4)\n",
"Requirement already satisfied: colorama in c:\\software\\python3\\lib\\site-packages (from tqdm->kaggle) (0.4.6)\n",
"Requirement already satisfied: pandas in c:\\software\\python3\\lib\\site-packages (1.5.3)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\software\\python3\\lib\\site-packages (from pandas) (2022.7.1)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in c:\\software\\python3\\lib\\site-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: numpy>=1.21.0 in c:\\software\\python3\\lib\\site-packages (from pandas) (1.24.2)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in c:\\software\\python3\\lib\\site-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: six>=1.5 in c:\\software\\python3\\lib\\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n",
"Requirement already satisfied: pandas in c:\\software\\python3\\lib\\site-packages (1.5.3)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\software\\python3\\lib\\site-packages (from pandas) (2022.7.1)\n",
"Requirement already satisfied: numpy>=1.21.0 in c:\\software\\python3\\lib\\site-packages (from pandas) (1.24.2)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in c:\\software\\python3\\lib\\site-packages (from pandas) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\software\\python3\\lib\\site-packages (from pandas) (2022.7.1)\n",
"Requirement already satisfied: six>=1.5 in c:\\software\\python3\\lib\\site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)\n",
"Requirement already satisfied: seaborn in \\\\files\\students\\s487174\\.appdata\\python\\python310\\site-packages (0.12.2)\n",
"Requirement already satisfied: pandas>=0.25 in c:\\software\\python3\\lib\\site-packages (from seaborn) (1.5.3)\n",
"Requirement already satisfied: numpy!=1.24.0,>=1.17 in c:\\software\\python3\\lib\\site-packages (from seaborn) (1.24.2)\n",
"Requirement already satisfied: pandas>=0.25 in c:\\software\\python3\\lib\\site-packages (from seaborn) (1.5.3)\n",
"Requirement already satisfied: matplotlib!=3.6.1,>=3.1 in c:\\software\\python3\\lib\\site-packages (from seaborn) (3.7.0)\n",
"Requirement already satisfied: contourpy>=1.0.1 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.0.7)\n",
"Requirement already satisfied: fonttools>=4.22.0 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (4.38.0)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (23.0)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (0.11.0)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (2.8.2)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (3.0.9)\n",
"Requirement already satisfied: pillow>=6.2.0 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (9.4.0)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (2.8.2)\n",
"Requirement already satisfied: contourpy>=1.0.1 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.0.7)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (0.11.0)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (23.0)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\software\\python3\\lib\\site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.4.4)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\software\\python3\\lib\\site-packages (from pandas>=0.25->seaborn) (2022.7.1)\n",
"Requirement already satisfied: six>=1.5 in c:\\software\\python3\\lib\\site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.1->seaborn) (1.16.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"poland-cars-for-sale-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)\n",
"Archive: poland-cars-for-sale-dataset.zip\n",
" inflating: Car_sale_ads.csv \n",
@ -58,11 +59,11 @@
}
],
"source": [
"!pip install --user kaggle\n",
"!pip install --user pandas\n",
"%pip install --user kaggle\n",
"%pip install --user pandas\n",
"\n",
"!pip install --user pandas\n",
"!pip install --user seaborn\n",
"%pip install --user pandas\n",
"%pip install --user seaborn\n",
"\n",
"!kaggle datasets download -d bartoszpieniak/poland-cars-for-sale-dataset\n",
"\n",
@ -70,15 +71,12 @@
"\n",
"!wc -l Car_sale_ads.csv\n",
"\n",
"!head -n 5 Car_sale_ads.csv\n",
"\n",
"\n",
"\n"
"!head -n 5 Car_sale_ads.csv"
]
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 2,
"metadata": {},
"outputs": [
{
@ -92,7 +90,7 @@
"Name: Drive, dtype: int64"
]
},
"execution_count": 11,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
@ -102,41 +100,50 @@
"cars=pd.read_csv('Car_sale_ads.csv')\n",
"cars\n",
"cars.describe(include='all')\n",
"cars[\"Drive\"].value_counts()\n",
"\n"
"cars[\"Drive\"].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 7,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"Front wheels 658\n",
"4x4 (permanent) 87\n",
"4x4 (attached automatically) 84\n",
"Rear wheels 82\n",
"4x4 (attached manually) 13\n",
"Name: Drive, dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"## Split\n",
"\n",
"from sklearn.model_selection import train_test_split\n"
"from sklearn.model_selection import train_test_split\n",
"\n",
"cars_train, cars_test = train_test_split(cars, test_size=1000, random_state=1)\n",
"cars_train[\"Drive\"].value_counts()\n",
"cars_test[\"Drive\"].value_counts()\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'sklearn' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[13], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m cars_train, cars_test \u001b[39m=\u001b[39m sklearn\u001b[39m.\u001b[39mmodel_selection\u001b[39m.\u001b[39mtrain_test_split(cars, test_size\u001b[39m=\u001b[39m\u001b[39m1000\u001b[39m, random_state\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m)\n\u001b[0;32m 2\u001b[0m iris_train[\u001b[39m\"\u001b[39m\u001b[39mDrive\u001b[39m\u001b[39m\"\u001b[39m]\u001b[39m.\u001b[39mvalue_counts()\n",
"\u001b[1;31mNameError\u001b[0m: name 'sklearn' is not defined"
]
}
],
"outputs": [],
"source": [
"cars_train, cars_test = sklearn.model_selection.train_test_split(cars, test_size=1000, random_state=1)\n",
"iris_train[\"Drive\"].value_counts()"
"from sklearn.model_selection import train_test_split\n",
"#cars_train, cars_test = train_test_split(cars, test_size=50, random_state=1, stratify=cars[\"Drive\"])\n",
"#cars_train[\"Drive\"].value_counts()\n",
"\n"
]
}
],