agabka/.ipynb_checkpoints/projekt (5) (4)-checkpoint.ipynb

3326 lines
764 KiB
Plaintext
Raw Normal View History

2024-04-04 22:23:05 +02:00
{
"cells": [
{
"cell_type": "markdown",
"id": "033f13af",
"metadata": {},
"source": [
"This dataset contains information on patients with lung cancer, including their age, gender, air pollution exposure, alcohol use, dust allergy, occupational hazards, genetic risk, chronic lung disease, balanced diet, obesity, smoking, passive smoker, chest pain, coughing of blood, fatigue, weight loss ,shortness of breath ,wheezing ,swallowing difficulty ,clubbing of finger nails and snoring\n",
"\n",
"https://www.kaggle.com/datasets/thedevastator/cancer-patients-and-air-pollution-a-new-link/data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "7ce53ad1",
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import plotly.figure_factory as ff\n",
"import seaborn as sns\n",
"sns.set()\n",
"import plotly.express as px\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "3b9fd854",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: plotnine in c:\\users\\hp\\anaconda3\\lib\\site-packages (0.12.4)\n",
"Requirement already satisfied: matplotlib>=3.6.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (3.8.0)\n",
"Requirement already satisfied: mizani<0.10.0,>0.9.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (0.9.3)\n",
"Requirement already satisfied: numpy>=1.23.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (1.26.0)\n",
"Requirement already satisfied: pandas>=1.5.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (2.1.1)\n",
"Requirement already satisfied: patsy>=0.5.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (0.5.5)\n",
"Requirement already satisfied: scipy>=1.5.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (1.11.4)\n",
"Requirement already satisfied: statsmodels>=0.14.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from plotnine) (0.14.0)\n",
"Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (1.2.0)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (0.12.1)\n",
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (4.25.0)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (1.4.4)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (23.1)\n",
"Requirement already satisfied: pillow>=6.2.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (10.0.1)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (3.0.9)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.6.0->plotnine) (2.8.2)\n",
"Requirement already satisfied: tzdata in c:\\users\\hp\\anaconda3\\lib\\site-packages (from mizani<0.10.0,>0.9.0->plotnine) (2023.3)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from pandas>=1.5.0->plotnine) (2023.3.post1)\n",
"Requirement already satisfied: six in c:\\users\\hp\\anaconda3\\lib\\site-packages (from patsy>=0.5.1->plotnine) (1.16.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"[notice] A new release of pip is available: 23.3.2 -> 24.0\n",
"[notice] To update, run: python.exe -m pip install --upgrade pip\n"
]
}
],
"source": [
"pip install plotnine"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "6d369f6b",
"metadata": {},
"outputs": [],
"source": [
"import plotnine"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "73edef6d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Patient Id</th>\n",
" <th>Age</th>\n",
" <th>Gender</th>\n",
" <th>Air Pollution</th>\n",
" <th>Alcohol use</th>\n",
" <th>Dust Allergy</th>\n",
" <th>OccuPational Hazards</th>\n",
" <th>Genetic Risk</th>\n",
" <th>chronic Lung Disease</th>\n",
" <th>Balanced Diet</th>\n",
" <th>...</th>\n",
" <th>Fatigue</th>\n",
" <th>Weight Loss</th>\n",
" <th>Shortness of Breath</th>\n",
" <th>Wheezing</th>\n",
" <th>Swallowing Difficulty</th>\n",
" <th>Clubbing of Finger Nails</th>\n",
" <th>Frequent Cold</th>\n",
" <th>Dry Cough</th>\n",
" <th>Snoring</th>\n",
" <th>Level</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>P1</td>\n",
" <td>33</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>Low</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>P10</td>\n",
" <td>17</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>Medium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>P100</td>\n",
" <td>35</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>...</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>P1000</td>\n",
" <td>37</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>P101</td>\n",
" <td>46</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>High</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 25 columns</p>\n",
"</div>"
],
"text/plain": [
" Patient Id Age Gender Air Pollution Alcohol use Dust Allergy \\\n",
"index \n",
"0 P1 33 1 2 4 5 \n",
"1 P10 17 1 3 1 5 \n",
"2 P100 35 1 4 5 6 \n",
"3 P1000 37 1 7 7 7 \n",
"4 P101 46 1 6 8 7 \n",
"\n",
" OccuPational Hazards Genetic Risk chronic Lung Disease \\\n",
"index \n",
"0 4 3 2 \n",
"1 3 4 2 \n",
"2 5 5 4 \n",
"3 7 6 7 \n",
"4 7 7 6 \n",
"\n",
" Balanced Diet ... Fatigue Weight Loss Shortness of Breath \\\n",
"index ... \n",
"0 2 ... 3 4 2 \n",
"1 2 ... 1 3 7 \n",
"2 6 ... 8 7 9 \n",
"3 7 ... 4 2 3 \n",
"4 7 ... 3 2 4 \n",
"\n",
" Wheezing Swallowing Difficulty Clubbing of Finger Nails \\\n",
"index \n",
"0 2 3 1 \n",
"1 8 6 2 \n",
"2 2 1 4 \n",
"3 1 4 5 \n",
"4 1 4 2 \n",
"\n",
" Frequent Cold Dry Cough Snoring Level \n",
"index \n",
"0 2 3 4 Low \n",
"1 1 7 2 Medium \n",
"2 6 7 2 High \n",
"3 6 7 5 High \n",
"4 4 2 3 High \n",
"\n",
"[5 rows x 25 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane = pd.read_csv(r'C:\\Users\\HP\\Desktop\\podyplomówka\\cancer_patient_data_sets.csv', index_col = 0)\n",
"dane.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "1831fdd7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 1000 entries, 0 to 999\n",
"Data columns (total 25 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Patient Id 1000 non-null object\n",
" 1 Age 1000 non-null int64 \n",
" 2 Gender 1000 non-null int64 \n",
" 3 Air Pollution 1000 non-null int64 \n",
" 4 Alcohol use 1000 non-null int64 \n",
" 5 Dust Allergy 1000 non-null int64 \n",
" 6 OccuPational Hazards 1000 non-null int64 \n",
" 7 Genetic Risk 1000 non-null int64 \n",
" 8 chronic Lung Disease 1000 non-null int64 \n",
" 9 Balanced Diet 1000 non-null int64 \n",
" 10 Obesity 1000 non-null int64 \n",
" 11 Smoking 1000 non-null int64 \n",
" 12 Passive Smoker 1000 non-null int64 \n",
" 13 Chest Pain 1000 non-null int64 \n",
" 14 Coughing of Blood 1000 non-null int64 \n",
" 15 Fatigue 1000 non-null int64 \n",
" 16 Weight Loss 1000 non-null int64 \n",
" 17 Shortness of Breath 1000 non-null int64 \n",
" 18 Wheezing 1000 non-null int64 \n",
" 19 Swallowing Difficulty 1000 non-null int64 \n",
" 20 Clubbing of Finger Nails 1000 non-null int64 \n",
" 21 Frequent Cold 1000 non-null int64 \n",
" 22 Dry Cough 1000 non-null int64 \n",
" 23 Snoring 1000 non-null int64 \n",
" 24 Level 1000 non-null object\n",
"dtypes: int64(23), object(2)\n",
"memory usage: 203.1+ KB\n"
]
}
],
"source": [
"dane.info()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "af7da17c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>Age</th>\n",
" <td>1000.0</td>\n",
" <td>37.174</td>\n",
" <td>12.005493</td>\n",
" <td>14.0</td>\n",
" <td>27.75</td>\n",
" <td>36.0</td>\n",
" <td>45.0</td>\n",
" <td>73.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Gender</th>\n",
" <td>1000.0</td>\n",
" <td>1.402</td>\n",
" <td>0.490547</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Air Pollution</th>\n",
" <td>1000.0</td>\n",
" <td>3.840</td>\n",
" <td>2.030400</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Alcohol use</th>\n",
" <td>1000.0</td>\n",
" <td>4.563</td>\n",
" <td>2.620477</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>5.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Dust Allergy</th>\n",
" <td>1000.0</td>\n",
" <td>5.165</td>\n",
" <td>1.980833</td>\n",
" <td>1.0</td>\n",
" <td>4.00</td>\n",
" <td>6.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>OccuPational Hazards</th>\n",
" <td>1000.0</td>\n",
" <td>4.840</td>\n",
" <td>2.107805</td>\n",
" <td>1.0</td>\n",
" <td>3.00</td>\n",
" <td>5.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Genetic Risk</th>\n",
" <td>1000.0</td>\n",
" <td>4.580</td>\n",
" <td>2.126999</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>5.0</td>\n",
" <td>7.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>chronic Lung Disease</th>\n",
" <td>1000.0</td>\n",
" <td>4.380</td>\n",
" <td>1.848518</td>\n",
" <td>1.0</td>\n",
" <td>3.00</td>\n",
" <td>4.0</td>\n",
" <td>6.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Balanced Diet</th>\n",
" <td>1000.0</td>\n",
" <td>4.491</td>\n",
" <td>2.135528</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Obesity</th>\n",
" <td>1000.0</td>\n",
" <td>4.465</td>\n",
" <td>2.124921</td>\n",
" <td>1.0</td>\n",
" <td>3.00</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Smoking</th>\n",
" <td>1000.0</td>\n",
" <td>3.948</td>\n",
" <td>2.495902</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Passive Smoker</th>\n",
" <td>1000.0</td>\n",
" <td>4.195</td>\n",
" <td>2.311778</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Chest Pain</th>\n",
" <td>1000.0</td>\n",
" <td>4.438</td>\n",
" <td>2.280209</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Coughing of Blood</th>\n",
" <td>1000.0</td>\n",
" <td>4.859</td>\n",
" <td>2.427965</td>\n",
" <td>1.0</td>\n",
" <td>3.00</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Fatigue</th>\n",
" <td>1000.0</td>\n",
" <td>3.856</td>\n",
" <td>2.244616</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Weight Loss</th>\n",
" <td>1000.0</td>\n",
" <td>3.855</td>\n",
" <td>2.206546</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Shortness of Breath</th>\n",
" <td>1000.0</td>\n",
" <td>4.240</td>\n",
" <td>2.285087</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>6.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Wheezing</th>\n",
" <td>1000.0</td>\n",
" <td>3.777</td>\n",
" <td>2.041921</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Swallowing Difficulty</th>\n",
" <td>1000.0</td>\n",
" <td>3.746</td>\n",
" <td>2.270383</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>8.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Clubbing of Finger Nails</th>\n",
" <td>1000.0</td>\n",
" <td>3.923</td>\n",
" <td>2.388048</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Frequent Cold</th>\n",
" <td>1000.0</td>\n",
" <td>3.536</td>\n",
" <td>1.832502</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Dry Cough</th>\n",
" <td>1000.0</td>\n",
" <td>3.853</td>\n",
" <td>2.039007</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>4.0</td>\n",
" <td>6.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>Snoring</th>\n",
" <td>1000.0</td>\n",
" <td>2.926</td>\n",
" <td>1.474686</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" count mean std min 25% 50% 75% \\\n",
"Age 1000.0 37.174 12.005493 14.0 27.75 36.0 45.0 \n",
"Gender 1000.0 1.402 0.490547 1.0 1.00 1.0 2.0 \n",
"Air Pollution 1000.0 3.840 2.030400 1.0 2.00 3.0 6.0 \n",
"Alcohol use 1000.0 4.563 2.620477 1.0 2.00 5.0 7.0 \n",
"Dust Allergy 1000.0 5.165 1.980833 1.0 4.00 6.0 7.0 \n",
"OccuPational Hazards 1000.0 4.840 2.107805 1.0 3.00 5.0 7.0 \n",
"Genetic Risk 1000.0 4.580 2.126999 1.0 2.00 5.0 7.0 \n",
"chronic Lung Disease 1000.0 4.380 1.848518 1.0 3.00 4.0 6.0 \n",
"Balanced Diet 1000.0 4.491 2.135528 1.0 2.00 4.0 7.0 \n",
"Obesity 1000.0 4.465 2.124921 1.0 3.00 4.0 7.0 \n",
"Smoking 1000.0 3.948 2.495902 1.0 2.00 3.0 7.0 \n",
"Passive Smoker 1000.0 4.195 2.311778 1.0 2.00 4.0 7.0 \n",
"Chest Pain 1000.0 4.438 2.280209 1.0 2.00 4.0 7.0 \n",
"Coughing of Blood 1000.0 4.859 2.427965 1.0 3.00 4.0 7.0 \n",
"Fatigue 1000.0 3.856 2.244616 1.0 2.00 3.0 5.0 \n",
"Weight Loss 1000.0 3.855 2.206546 1.0 2.00 3.0 6.0 \n",
"Shortness of Breath 1000.0 4.240 2.285087 1.0 2.00 4.0 6.0 \n",
"Wheezing 1000.0 3.777 2.041921 1.0 2.00 4.0 5.0 \n",
"Swallowing Difficulty 1000.0 3.746 2.270383 1.0 2.00 4.0 5.0 \n",
"Clubbing of Finger Nails 1000.0 3.923 2.388048 1.0 2.00 4.0 5.0 \n",
"Frequent Cold 1000.0 3.536 1.832502 1.0 2.00 3.0 5.0 \n",
"Dry Cough 1000.0 3.853 2.039007 1.0 2.00 4.0 6.0 \n",
"Snoring 1000.0 2.926 1.474686 1.0 2.00 3.0 4.0 \n",
"\n",
" max \n",
"Age 73.0 \n",
"Gender 2.0 \n",
"Air Pollution 8.0 \n",
"Alcohol use 8.0 \n",
"Dust Allergy 8.0 \n",
"OccuPational Hazards 8.0 \n",
"Genetic Risk 7.0 \n",
"chronic Lung Disease 7.0 \n",
"Balanced Diet 7.0 \n",
"Obesity 7.0 \n",
"Smoking 8.0 \n",
"Passive Smoker 8.0 \n",
"Chest Pain 9.0 \n",
"Coughing of Blood 9.0 \n",
"Fatigue 9.0 \n",
"Weight Loss 8.0 \n",
"Shortness of Breath 9.0 \n",
"Wheezing 8.0 \n",
"Swallowing Difficulty 8.0 \n",
"Clubbing of Finger Nails 9.0 \n",
"Frequent Cold 7.0 \n",
"Dry Cough 7.0 \n",
"Snoring 7.0 "
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane.describe().T"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "a043ec73",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Patient Id', 'Age', 'Gender', 'Air Pollution', 'Alcohol use',\n",
" 'Dust Allergy', 'OccuPational Hazards', 'Genetic Risk',\n",
" 'chronic Lung Disease', 'Balanced Diet', 'Obesity', 'Smoking',\n",
" 'Passive Smoker', 'Chest Pain', 'Coughing of Blood', 'Fatigue',\n",
" 'Weight Loss', 'Shortness of Breath', 'Wheezing',\n",
" 'Swallowing Difficulty', 'Clubbing of Finger Nails', 'Frequent Cold',\n",
" 'Dry Cough', 'Snoring', 'Level'],\n",
" dtype='object')"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane.columns"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "9dac40a9",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZoAAAGZCAYAAACwkvfNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABX5klEQVR4nO3dd3wUdf7H8dfMbnrvlRog9N6lN0UFFevJ6Xl6Vk5P7PoTTz27IiJYTk9Rz96RE7HSEnrvHdJ775udmd8fkWgkoYRsZnfzeT4ePB6wOzvz2QX2ne93vkUxDMNACCGEcBDV7AKEEEK4NwkaIYQQDiVBI4QQwqEkaIQQQjiUBI0QQgiHkqARQgjhUBI0QgghHEqCRgghhENJ0AjhQM42H9rZ6hFtgwSNaFXXXHMNiYmJDX51796dQYMGcfnll/Ptt982OH7ChAk88MADp33+Mz2+KV9++SWJiYmkp6cDsGDBAhITE0/79dnZ2dx8881kZGSc9Lj169eTmJjI+vXrT/s6f3zN6Xrttdd466236v98pu9JiOayml2AaHt69uzJP//5z/o/a5pGdnY277zzDnfddRcBAQGMGTMGgIULF+Lv729WqfUuv/xyRo8efdrHr1mzhhUrVjBnzpyTHterVy8++eQTunTpctrnbs5rAF566SX+/ve/1//5TN+TEM0lQSNanb+/P/379z/h8bFjxzJixAi++OKL+qDp2bNnK1fXuOjoaKKjo1v8vE19Fi39msY46j0J8UfSdSachqenJx4eHg0e+2NX2NKlS5k+fTp9+/Zl+PDh3HPPPeTm5jZ5zs8//5zu3buzYMGCJo/RdZ1XX32VcePG0a9fP2677TZKSkoaHPPHbqa0tDRuvfVWhg0bRr9+/bjyyitZuXIlUNft9uCDDwIwceLE+vonTJjAU089xV/+8hcGDhzII4880mQ32E8//cS5555Lnz59uPzyy1m7dm39c6fb3ZaYmFj/vo8/v3DhwvrfN/a6pUuXMmPGDAYMGMA555zDI4880uCzWLBgAZMnT2bFihVMmzaN3r17c+655/LVV181+fkKIUEjWp1hGNjt9vpfNTU1pKSk8PDDD1NRUcFFF13U6Os2b97MPffcw5QpU3jzzTd58MEHWbduHXfffXejxy9dupQ5c+Zwyy23cPvttzdZz/PPP88rr7zCpZdeysKFCwkJCWHu3LlNHq/rOjfffDOVlZU899xzvPrqqwQHB3PbbbeRkpLCuHHjuPXWW4G6L/bbbrut/rUffPBBfQA09T4BHnroIa699loWLFiAn58fN954I4cOHWry+FP55JNPALjsssvqf/9Hr776KrNnz6Zfv368/PLLzJo1i++//55rrrmG6urq+uPy8vJ4/PHHufbaa3njjTeIj4/ngQce4PDhw82uT7g36ToTrW7jxo306tWrwWOKotCtWzfmz5/PhAkTGn3d5s2b8fLy4sYbb8TLywuA4OBgdu7ciWEYKIpSf+zy5cu57777uOmmm7jzzjubrKW0tJT//ve/XHvttfVhNHr0aHJycli9enWjrykoKODw4cPccsstjB07FoC+ffuycOFCampq6NChA+3btwegR48exMfH1782MjKSBx54AFWt+xmvqRv6//znP7ngggsAGDFiBBMnTuS11147aQCezPGutujo6Ea73UpKSnjttde4/PLLG9w/69atGzNnzuTLL7/k6quvBqCqqoonn3ySESNGANCxY0fGjx/PypUrSUhIaFZ9wr1J0IhW16tXLx577DEAcnJymD9/PrW1tcybN++kX1RDhgxh3rx5TJs2jalTpzJmzBhGjRpV/2V/3O7du1m6dCmRkZH84x//OGkt27Zto7a2lokTJzZ4fOrUqU0GTXh4OF26dGHOnDmsWbOmvo7j3WUnk5CQUB8yTbFYLEyZMqX+z15eXowZM4bly5ef8vzNtW3bNmw2G9OmTWvw+ODBg4mLi2P9+vX1QQM0CKvj93kqKysdVp9wbdJ1Jlqdn58fffr0oU+fPkyaNIl33nmH8vJyrr/+egoLC5t83YABA3jjjTdo164db731FldffTVjx47l3XffbXDcgQMHGDlyJBkZGbz//vsnreX4/YfQ0NAGj0dERDT5GkVRePvtt7nkkktYvXo1s2fPZuTIkdx5550UFxef9Hrh4eEnfR7qWml/vFcVFhZGaWnpKV/bXMc/h8bqCw8Pp6ysrMFjPj4+9b8/HpwyR0c0RYJGmC4sLIxHHnmE7OxsnnzyyZMeO3r0aN566y02btzI66+/TteuXXnqqafYvn17/TGjRo3i9ddf58ILL2TevHlkZmY2eb6QkBCgrjvs904VGFFRUTz66KMkJSXx9ddfc8MNN/DDDz8wb968U7zbUysrKzvhSzs/P/+EMDzueJehpmn1j1VUVJzRNYOCguqv80d5eXn1n5MQzSFBI5zClClTGD16NP/73/+avG/x7LPPctlll2EYBj4+PowfP577778fgKysrPrjjrdGHnzwQaxWK4888kiT1x0wYADe3t4sW7asweMn66baunUrI0eOZMeOHSiKQo8ePZg9ezbdunUjOzsb4JTdYydjs9lYt25d/Z8rKipYsWIFw4YNa/T44/OMfv8ZbNmy5YTjTlZTv3798PT0ZMmSJQ0e37RpE5mZmQwcOPCM3oMQvydBI5zGQw89hIeHB0888QR2u/2E50eMGMGuXbt44IEHSE5OZsWKFTzxxBMEBwczfPjwE44PDw9n9uzZrF69msWLFzd6TT8/P2677TY+/PBDXnjhBZKSknjyySdPGjQ9e/bE29ub++67j2+//Zb169czb9489u7dy7nnngtAYGAgAD/++OMZj8by8PDgoYceYsmSJSxfvpy//e1vVFdXNxi99nvH71Edv2f05Zdf8s9//hM/P78GxwUGBrJ161Y2btx4QospODiYm266ic8++4zHHnuMpKQkPv74Y26//Xa6dOnCjBkzzug9CPF7EjTCaXTu3JlrrrmGAwcONHpvZcyYMbzwwgscPHiQv//979x11134+Pjw3nvvERwc3Og5r7rqKvr27ctTTz3V5P2fm2++mYceeohly5Zx6623sn///vqWUmO8vLx4++236dq1K08++SQ33HADP//8M48//nj9F/KwYcMYOXIkc+fO5dlnnz2jzyEoKIh7772XefPmcccdd2CxWHj//ffp3Llzo8d36tSJZ599lszMTG666Sbeffdd/vWvfxEZGdnguFtuuYWdO3dy4403Nmj9HHf77bfz6KOPsmHDBm655RYWLlzIeeedx4cfftjgnowQZ0ox5A6eEEIIB5IWjRBCCIeSoBFCCOFQEjRCCCEcSoJGCCGEQ0nQCCGEcCgJGiGEEA4lQSOEEMKhJGiEEEI4lASNEEIIh5KgEUII4VASNEIIIRxKgkYIIYRDSdAIIYRwKAkaIYQQDiVBI4QQwqEkaIQQQjiUBI0QQgiHkqARQgjhUBI0QgghHEqCRgghhENJ0AghhHAoCRohhBAOJUEjhBDCoSRohBBCOJQEjRBCCIeSoBFCCOFQEjRCCCEcSoJGCCGEQ0nQCCGEcCgJGiGEEA4lQSOEEMKhJGiEEEI4lASNEEIIh5KgEUII4VASNEIIIRxKgkYIIYRDSdAIIYRwKAkaIYQQDiVBI4QQwqEkaIQQQjiUBI0QQgiHkqARQgjhUBI0QgghHMpqdgFCuAJdN9ANA8MAVQWL2nI/o2m6joFed25FwaJaWuzcQjgDCRohALumoyi/BYhhGJSU2ygur6G0vIaSChtllTbKKmyUVdbW/f7XX+WVtVTb7Oj6b4H05/O6M3VkJ25d8hA1dhuqoqIqCl5WL/w8fPD99Zef52+/9/Xwwd/Tj3C/UCL9wgnxCcL6u9DRdA0DA4tiQVEUsz4qIc6YBI1oMwzDQNMNrJbfWiPFZTWk55WRnlNOZn4FWfnlZOZVkF1Qgc2uN/tax19bYauk2l7TrHMoKAR5BxDhF0a4bwjhvmFE+IUSExBJx+B2BHkHAKAbOoZhSEtIOC0JGuG27JqORVVQFIVqm539KUXsPVbI0cw
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"level_counts = dane['Level'].value_counts()\n",
"ax = level_counts.plot(kind = 'pie', autopct='%1.1f%%', startangle=90)\n",
"ax = plt.title('Risk distribiution')"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "b45bd771",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABDYAAAHcCAYAAAAtNCoPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAACBUUlEQVR4nO3dd3gU5dfG8Xs3IUAgIaGFLghSJYTeRDpKUwNiA6QJSBWQImKhiIDSmyBdQEEEERG7gIKAUtUfTSIgLdRAgNCSnfcP3qwsCSTZbM/3c11cujOzc84zz8zO7MnMsybDMAwBAAAAAAB4IbO7EwAAAAAAALAXhQ0AAAAAAOC1KGwAAAAAAACvRWEDAAAAAAB4LQobAAAAAADAa1HYAAAAAAAAXovCBgAAAAAA8FoUNgAAAAAAgNeisAEAgAczDMPdKXhEDp7ElduDbQ8AQMoobAAAYKf27durVKlS1n+lS5dWxYoV1apVKy1evFgJCQk2yzdo0ECvvfZaqtf/448/asiQISku99prr6lBgwZ2x7mXmzdvasyYMfryyy/vGcsTjB8/XtWrV1dERIRWr17t1Fg7duxQ9+7dra+PHz+uUqVKadWqVQ6Pldr+T7Rq1SqVKlVK27Ztc3guAAB4Mn93JwAAgDcrW7as3n77bUlSQkKCLl26pI0bN+rdd9/Vjh07NGnSJJlMJknS9OnTlT179lSve+HChalarmfPnnrxxRfTnHtKzpw5o4ULF2rMmDFOj2WvgwcPas6cOXrmmWf05JNP6sEHH3RqvBUrVujQoUPW13nz5tXy5ctVpEgRh8dKbf8nStzPEv8LAEBGQWEDAIB0yJ49uyIiImymNWjQQMWKFdOYMWPUoEEDPfHEE5JuF0GcwRlfqj0hVmpcvHhRktS8eXNVqVLF5fEDAgKS9L+75MmTR5IUFhbm5kwAAHAtHkUBAMAJ2rdvr7x582rZsmXWaXc/IrJu3To98cQTCg8PV40aNTRw4ECdOXPG+v7ffvtNv/32m/Xxgm3btqlUqVJatmyZ6tevr1q1amnTpk3JPh5y69YtvfPOO6pataqqVq2qIUOG6MKFC9b5yb3nzscqjh8/roYNG0qShg4dal327vclJCRo6dKlatmypcLDw1WvXj2NHz9eN27csInVsWNHrVy5Uo899pgefvhhPfHEE9q4cWOK23HdunVq1aqVKlasqNq1a+utt97SpUuXJEnTpk1T+/btJUkdOnS45yMyidtt06ZNatu2rcLDw9W4cWMtWbLEZrkLFy5oxIgRql+/vh5++GFVq1ZNvXr10vHjx63t+Pzzz3XixAmb7XT3oygnT57UgAEDVK1aNVWoUEEdOnTQ3r17k2znr7/+Wn379lXFihVVtWpVDRs2TFevXpWUfP9L0uLFi/X444+rfPnyqlOnjoYPH64rV65Ikh566CEFBgbet/i0f/9+9e7dWzVq1FC5cuVUp04dvfPOO7p+/bp1mStXruitt95SzZo1VbFiRfXv318LFy5UqVKlbNb1ww8/qFWrVipfvrxq166td955R3FxcffpTQAAnIPCBgAATuDn56eaNWvqjz/+UHx8fJL5O3bs0MCBA9WkSRPNmTNHQ4cO1datW/Xqq69Kkt5++22VLVtWZcuW1fLly1WuXDnreydNmqQhQ4ZoyJAh97xb4Ouvv9Zff/2lsWPHavDgwdqwYYN69uyZ6vzz5s2r6dOnS5J69Ohh/f+7vfXWW3r33XfVoEEDffDBB2rbtq2WLFminj172gx8+ddff2nevHnq27evZsyYIX9/f/Xt29dapEjOzJkz1b9/f1WoUEFTp05Vr1699O2336p9+/a6fv262rRpo7feesuax71yTNS/f3+VLVtWM2bMUO3atTVq1CgtXrxY0u1BOrt3767Nmzfr1Vdf1bx589SzZ0/9+uuv1hg9e/ZU3bp1lSdPHi1fvlz16tVLEuPChQt67rnn9L///U9vvvmmJkyYIIvForZt2yoqKspm2bffflsFCxbUzJkz9dJLL2nlypWaNWuWdd7d/f/VV19p3Lhxatu2rebNm6devXrpiy++0DvvvCPp9p0au3btuuejKGfOnFHbtm117do1jR07VnPmzFHTpk21ePFim8deevXqpa+//lp9+vTRpEmTdPXqVU2YMMFmXV9++aV69eqlBx98UDNmzFDv3r21Zs2aJP0OAIAr8CgKAABOkjt3bt26dUsXL15U7ty5bebt2LFDmTNnVteuXZU5c2ZJUkhIiP78808ZhqESJUpYx+O4u3jx3HPP6fHHH79v7ODgYM2dO9e6jtDQUPXq1UubNm3SI488kmLuAQEBKlOmjKTbj58k9xjNoUOH9Nlnn6lfv37q0aOHJKl27drKmzevBg8erJ9//ll169aVJF2+fFmrVq2y3k0QGBiodu3aaevWrXrssceSrPvSpUv64IMP1KZNG+sYJpJUsmRJtW3bVqtWrdILL7ygEiVKSJJKlCiR4qM+jRo10rBhwyRJderU0ZkzZ6zFmLNnzypr1qwaMmSI9ZGW6tWr6/jx49a7booUKaKcOXPaPH5y9x0KixYt0sWLF/XJJ5+oYMGCkqRHH31UzZo105QpUzR16lTrsnXr1rUODlqzZk1t3rxZGzZs0Kuvvpps/2/btk0FCxZU27ZtZTabVa1aNQUGBiomJua+7U508OBBlSlTRlOmTLGuu1atWtqyZYt+//13vfzyy9qyZYu2bt2qadOmqUmTJtb8W7ZsaR1bxDAMjR8/XnXq1NH48eOt6y9atKg6duyojRs3Jlv0AQDAWbhjAwAAJ0vuL+hVq1bV9evX1bJlS02aNEk7duzQI488ot69e6c4+OPdjwQkp27dujYDlTZo0ECZMmXSr7/+mvYG3MNvv/0mSWrZsqXN9ObNm8vPz8/m1zly5sxp84hEvnz5JEnXrl1Ldt27d+/WzZs3k6y7SpUqKliwoF2//PHkk0/avG7SpInOnz+vw4cPKywsTB999JGqVKmikydPasuWLVqyZIl27typW7dupTrGli1bVKZMGYWFhSk+Pl7x8fEym8169NFHk2z7uwtW+fLlu++jHDVq1NCRI0fUqlUrzZw5U3v37lXLli3VoUOHVOX2yCOPaMmSJcqcObMOHz6s9evXa9asWbpw4YJu3rwpSdq6dasyZcqkRo0aWd9nNpvVtGlT6+t//vlH0dHRatCggbWN8fHxqlq1qrJnz67NmzenKh8AAByFOzYAAHCS06dPK0uWLAoJCUkyr2LFivrwww+1cOFCzZs3T7NmzVKePHnUtWvXFL+o5sqVK8XYd98hYjabFRISotjY2DS14X4SHyNJHLQykb+/v0JDQ3X58mXrtKxZs9osk1i8sVgs91333e1InHbnulMrb968Nq8Tt2PiNlmzZo0mTpyoU6dOKSQkRKVLl1aWLFnSFOPixYs6evSozaNDd7qzkHP3NjGbzfd9jKNZs2ayWCz6+OOPNX36dE2ZMkUFCxbUq6++qubNm6eYm8Vi0cSJE7V06VLFxcUpf/78Cg8Pt94xJEkxMTEKCQmR2Wz7t687+yFxwNYRI0ZoxIgRSeIkjhMDAICrUNgAAMAJEhIS9Ntvv6lSpUry8/NLdpk6deqoTp06unbtmrZu3aqPPvpI7777riIiIlShQoV0xb+7gJGQkKCYmBjrl3mTyaSEhASbZdI68GOOHDkkSWfPnlWhQoWs02/duqWYmBiFhobak7rNus+dO6fixYvbzDt79qwKFy6c5nUmfiFPdP78eUm3Cxzbt2/XkCFD1K5dO3Xp0sV6R8l7772nHTt2pDpGUFCQqlWrpsGDByc7PyAgIM1536lFixZq0aKFLl++rE2bNmnOnDkaNGiQqlSpkuKvoSQW0oYPH67HHntMQUFBkqSnn37aukxYWJhiYmJksVhsihuJ20q6/ZiTJA0ePFjVqlVLEiex7wAAcBUeRQEAwAmWLVumM2fO6Pnnn092/rhx4/T000/LMAxlzZpV9evXt463cOrUKUlK8lfztPj1119tBi399ttvFR8fr+rVq0uSsmXLppiYGJtfL9m5c6fNOu5
"text/plain": [
"<Figure size 1300x500 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\n",
"age_counts = dane['Age'].value_counts().sort_index(ascending=False)\n",
"\n",
"# histogram\n",
"ax = age_counts.plot(kind='bar', figsize=(13, 5), width=1)\n",
"ax.set_xlabel('Age')\n",
"ax.set_ylabel('Count')\n",
"ax.set_title (\"Distribution of patients' age\")\n",
"for i, value in enumerate(age_counts):\n",
" ax.text(i, value + 0.1, str(value), ha='center', va='bottom')\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "78391055",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"37.174"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mean_age = dane['Age'].mean()\n",
"mean_age"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "966e57b9",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAHwCAYAAACIfURnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABKD0lEQVR4nO3deVxUZf//8TcDIiAiSAouqYgpmYqaKKaEabaIluZdWuLtriXJTyy0cq9MLZJwKxfMrcJS626xus02TXPB7S40l9uoVCQVQZFFZub3h1/mbgJLEJmDvJ6Phw+Zc64553PmzGHeXOc6Z5ysVqtVAAAABmRydAEAAABXQlABAACGRVABAACGRVABAACGRVABAACGRVABAACGRVABAACGRVABAACGRVAByoAR7ptohBqMpDxfD1574PohqOCGN3DgQDVr1sz2LygoSG3atNFDDz2kVatWyWw227Xv2rWrnnnmmate/qZNmzRhwoS/bffMM8+oa9eupV7PleTn52vmzJn66KOPrrguI4iLi1OHDh3UunVrffDBB9d1XcnJyRo1apTt8W+//aZmzZpp/fr1Zb6uq93/hdavX69mzZpp+/btZV5LWWrWrJnmzZvn6DIAuTi6AKA8NG/eXFOnTpUkmc1mZWZm6ptvvtFLL72k5ORkxcfHy8nJSZI0f/58eXp6XvWyly9fflXtRo8erX/+858lrv3vpKena/ny5Zo5c+Z1X1dpHTp0SEuWLNEjjzyiBx98UI0bN76u63vvvfd05MgR2+PatWtrzZo1atCgQZmv62r3f6HC91nh/wD+GkEFlYKnp6dat25tN61r164KCAjQzJkz1bVrVz3wwAOSLoea6+F6fEgaYV1X49y5c5KkiIgItWvXrtzX7+rqWmT/O0qtWrUkSX5+fg6uBKgYOPWDSm3gwIGqXbu2kpKSbNP+fEpmw4YNeuCBB9SqVSuFhobq6aefVnp6uu35O3bs0I4dO2zd+du3b1ezZs2UlJSku+66S3fccYe2bNlS7OmYS5cu6cUXX1RISIhCQkI0YcIEnT171ja/uOf88TTGb7/9pm7dukmSnn32WVvbPz/PbDbrrbfeUq9evdSqVSt16dJFcXFxysvLs1vX4MGDtW7dOt17771q0aKFHnjgAX3zzTd/+zpu2LBBDz30kNq0aaNOnTppypQpyszMlCTNmzdPAwcOlCQNGjToiqekCl+3LVu2aMCAAWrVqpW6d++u1atX27U7e/aspk+frrvuukstWrRQ+/btFRUVpd9++822He+//76OHz9u9zr9+dTPiRMnNG7cOLVv317BwcEaNGiQUlJSirzOn376qaKjo9WmTRuFhIRo4sSJys7OllT8/pekVatW6b777lPLli0VFhamadOm6cKFC5KkW265RR4eHn8ZJo8ePaoRI0aobdu2uuOOOxQfH69nn33W9jpKksVi0eLFi9W9e3e1aNFC9957r1atWmW3nIEDB2rixIlavHixunTpopYtW6p///7at2+fXbsdO3aoX79+Cg4O1r333qutW7cWqSkvL08vv/yywsPD1aJFC/Xq1UsbNmywa9O1a1e99NJLGjRokNq2baspU6ZccRuBq0WPCio1Z2dndezYURs2bFBBQYFcXOwPieTkZD399NMaPXq0QkJClJaWpldeeUVPPfWUVq1apalTpyo2NlaSNHXqVDVp0kQ//vijJCk+Pl7Tp09XXl6eWrdurY8//rjI+j/99FO1atVKs2bN0tmzZxUXF6fU1FS74PRXateurfnz5+vJJ5/UE088oXvuuafYdlOmTNEHH3yg4cOHq3379kpJSdGCBQt04MABLV261HYa4ocfflB6erqio6Pl6emphIQERUdH69tvv1WNGjWKXfbChQuVkJCgxx57TDExMfr111+VkJCgvXv36t1339XDDz+smjVr6vnnn9eUKVPUpk2bv9ymmJgY9e7dW48//rg2bdqkF154QVarVQMHDpTVatWoUaOUmZmpp556SrVq1dKBAweUkJCgKVOmaNmyZRo9erTOnj2rlJQUzZ8/Xw0aNNDFixft1nH27Fn1799f7u7umjx5stzd3bVixQoNGDBAa9euVWBgoK3t1KlT1bdvXy1cuFD79+9XfHy8atasqaeeeqrY/f/JJ59o9uzZmjBhgpo1a6b//ve/mj17tnJzczVr1iz5+flpz549V9z+s2fPKjIyUr6+vpo5c6bMZrMSEhJ04sQJu16hadOmaf369Ro1apTatGmjnTt36qWXXlJWVpaioqJs7T7//HMFBgZq0qRJslqtmj17tqKjo/Xll1/K2dlZP/74o4YOHaoOHTrY1jNu3Di7mqxWq6KiorR7925FR0crMDBQGzduVExMjPLz89W7d29b27feeksDBgzQyJEj5ebm9pf7GrgaBBVUejfddJMuXbqkc+fO6aabbrKbl5ycrKpVq2rEiBGqWrWqJMnb21v/+c9/ZLVa1aRJE9t4lj+fWujfv7/uu+++v1y3l5eXli5daluGj4+PoqKitGXLFnXu3Plva3d1ddWtt94q6fLpnuJOWx05ckRr167V2LFj9cQTT0iSOnXqpNq1a2v8+PH69ttvFR4eLkk6f/681q9fb/tr38PDQ5GRkfr+++917733Fll2ZmamXn/9dT388MO2MUCS1LRpUw0YMEDr16/XY489piZNmkiSmjRp8ren1u6++25NnDhRkhQWFqb09HS9/vrrGjBggH7//Xe5u7trwoQJtlNIHTp00G+//WYLdw0aNFDNmjXtTvf8OaisWLFC586d0zvvvKN69epJku6880716NFDCQkJmjt3rq1teHi4bbBsx44d9d133+nrr7/WU089Vez+3759u+rVq6cBAwbIZDKpffv28vDwUEZGxl9ud6FVq1YpOztbH3zwge30UGFPR6Fjx47p3Xff1bhx4zRy5EhJUufOneXk5KRFixbpsccek4+PjySpoKBAiYmJtjqzs7M1YcIEHThwQC1atNCiRYtUs2ZNvf7663J1dZV0+T0eExNjW9/WrVu1efNmxcfHq0ePHrZ9k5OTo7i4OPXs2dMW8mvXrq1nnnlGJhMd9igbvJOA/1Pc4MaQkBDl5uaqV69eio+PV3Jysjp37qwnn3zybwdDNmvW7G/XGR4ebjdwt2vXrqpSpUqxXe+ltWPHDklSr1697KZHRETI2dnZ7uqTmjVr2p2S8Pf3lyTl5OQUu+y9e/cqPz+/yLLbtWunevXqlerKlgcffNDu8T333KMzZ87o2LFj8vPz08qVK9WuXTudOHFC27Zt0+rVq7V7925dunTpqtexbds23XrrrfLz81NBQYEKCgpkMpl05513Fnnt/xxA/f39iwSfPwoNDdXPP/+shx56SAsXLlRKSop69eqlQYMGXVVt33//vdq0aWM3hqVevXp2PVHff/+9rFarunbtaqu/oKBAXbt2VV5enpKTk21t/ximpP+NjSncp8nJyQoLC7OFFOnya+7s7Gx7vG3bNjk5OSk8PLzI+n7//XcdPnzY1jYwMJCQgjJFjwoqvVOnTsnNzU3e3t5F5rVp00aLFy/W8uXLlZiYqDfeeEO1atXSiBEj/vaDx9fX92/X/eceHJPJJG9vb2VlZZVoG/5K4ViRwkGchVxcXOTj46Pz58/bprm7u9u1KQxjFovlL5f95+0onPbHZV+t2rVr2z0ufB0LX5MPP/xQc+bM0cmTJ+Xt7a2goKASn2I4d+6cUlNTddtttxU7/4/B7M+viclk+sv7pvTo0UMWi0Vvv/225s+fr4SEBNWrV09PPfWUIiIi/ra2s2fPFltXrVq19Pvvv9vql3TF5Z06deov65f+t08zMzNVs2ZNuzaF741C586dk9VqVdu2bYtdX3p6uq1nr7j3AnAtCCqo1Mxms3bs2KG2bdva/QX5R2FhYbZu7u+//14rV67USy+9pNatWys4OPia1v/nQGI2m5WRkWH7cHZycipyn5e/+mu+OIVjS37//XfVr1/fNv3SpUvKyMiw+0AqqcJlnz592m5cR+H6br755hIvs/BDuNCZM2ckXQ4su3bt0oQ
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"gender_counts = dane['Gender'].value_counts()\n",
"ax = gender_counts.plot(kind='bar')\n",
"\n",
"# Dodawanie wartości do słupków\n",
"for i, value in enumerate(gender_counts):\n",
" ax.text(i, value + 0.1, str(value), ha='center', va='bottom')\n",
"\n",
"# Zmiana etykiet osi x\n",
"ax.set_xticks([0, 1])\n",
"ax.set_xticklabels(['Man', 'Woman'])\n",
"ax.set_title (\"Distribution of patients' gender\")\n",
"\n",
"# Dodanie legendy\n",
"plt.legend()\n",
"\n",
"# Wyświetlenie wykresu\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "98973a0f",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAj8AAAHJCAYAAABqj1iuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABfvUlEQVR4nO3deXxMV+PH8c9MdpIQse9KJdQuIUEagmptpbWVUEtR60Nb+74vtW+NfW1LqSqtPoraqmippbYqSmsJRYg1kZn5/eGXeTpNaESSScz3/Xp5yZx777nnTm4m35xz7r0Gi8ViQURERMRBGO3dABEREZG0pPAjIiIiDkXhR0RERByKwo+IiIg4FIUfERERcSgKPyIiIuJQFH5ERETEoSj8iIiIiENR+BFJ59LDfUjTQxtERFKKwo/IM2jdujV+fn7Wf/7+/pQvX5433niD5cuXYzKZbNYPCwujf//+Sa5/69at9OvX71/X69+/P2FhYcnez+PExsYybtw4NmzY8Nh9pQeTJk2icuXKlCtXjnXr1iVYvm/fPvz8/Ni3b1+at+3f3i97ti0ttG7dmtatW9u7GSI2nO3dAJGMrmTJkgwbNgwAk8nErVu32LFjB2PHjuXAgQNMnToVg8EAwKxZs/D09Exy3UuWLEnSel27dqVNmzZP3fZ/c/XqVZYsWcK4ceNSfV/JderUKebPn0+zZs14/fXXeeGFF+zdJBFJ5xR+RJ6Rp6cn5cqVsykLCwujSJEijBs3jrCwMBo2bAg8CkqpoWDBgqlSr733lRQ3b94EoF69egQEBNi3MSKSIWjYSySVtG7dmpw5c7Jy5Upr2T+HozZu3EjDhg0pU6YMQUFBfPDBB1y9etW6/Y8//siPP/5oHRaJHyJZuXIlNWrUoEqVKnz//feJDq08fPiQ0aNHExgYSGBgIP369ePGjRvW5Yltc+HCBfz8/Fi7di0XLlygZs2aAAwYMMC67j+3M5lMfPzxxzRo0IAyZcpQvXp1Jk2aRExMjM2+2rZty+eff06dOnUoVaoUDRs2ZMeOHf/6Pm7cuJE33niD8uXLU7VqVYYOHcqtW7cAmDlzpnVI5e23336q4bhTp07RuXNnKlSoQIUKFejWrRt//vknADExMQQEBDB27FibbcxmM9WqVWPEiBHWstWrV1OvXj1KlSpF9erVmTlzJnFxcUluR7zTp0/TsmVLSpcuTe3atVm+fLl1Wc+ePQkNDcVsNttsM3ToUGrWrPnYOVl37txh6NChBAcHU758eXr37s2SJUvw8/OzWW/Lli288cYblC5dmqpVqzJ69Gju3btnXT5z5kxq167N9u3badCgAaVKlaJOnTp88cUXNvVcunSJ7t27U7FiRapWrcrixYsTbde/vWf9+/fn7bffZtiwYQQEBNC4ceNkvacij6PwI5JKnJycCA4O5siRI4l+cB84cIAPPviAV155hfnz5zNgwAD27t3L+++/D8CwYcMoWbIkJUuWZNWqVbz00kvWbadOnUq/fv3o169fgl6neN988w1Hjx5l/Pjx9O3bl+3bt9O1a9cktz9nzpzMmjULgC5duli//qehQ4cyduxYwsLC+Oijj2jVqhUrVqyga9euNr+Ujx49ysKFC+nZsyezZ8/G2dmZnj17WoNMYubMmUPv3r0pW7YsM2bMoFu3bmzatInWrVvz4MEDmjZtytChQ63teFwb/+n333+nRYsWXL9+nfHjxzNmzBj+/PNP3nrrLa5fv46bmxt16tThm2++sQkc+/bt46+//uL1118HYO7cuQwZMoTg4GAiIiJo1aoV8+fPt7bpaYwbN46yZcsyZ84cQkJCGD16NJ999hkATZo0ITIy0mZeUGxsLN988w2NGze2Dqv+U7du3fjmm2/o0aMHU6dO5e7du0yePNlmnQ0bNtCtWzdeeOEFZs+eTffu3Vm/fn2C799ff/3FyJEjadOmDfPmzSN//vz079+fM2fOAHDv3j3Cw8M5efIkI0eOZOjQoaxevZqDBw/a7C+p79n+/fs5f/48M2fOpFu3bjg7a6BCUo7OJpFUlD17dh4+fMjNmzfJnj27zbIDBw7g5uZGx44dcXNzAyBr1qz88ssvWCwWihUrZp0f9M+A06JFC1599dUn7tvb25sFCxZY6/Dx8aFbt258//33VKtW7V/b7urqSokSJYBHQ12JDdmdPn2aNWvW0KtXL7p06QJA1apVyZkzJ3379mXnzp2EhoYCcPv2bdauXWsdNsuUKRPh4eHs3buXOnXqJKj71q1bfPTRRzRt2tQ6pwqgePHitGrVirVr19KyZUuKFSsGQLFixZI8rDhr1izc3d1ZsmSJ9f0JDg6mVq1aLFiwgH79+vH666+zZs0a9u/fT6VKlYBHQaFQoUKUK1eO27dv89FHH9G8eXMGDx4MQLVq1ciaNSuDBw+mXbt2vPjii0lqD8Abb7xhndweEhLClStXmD17Nk2aNKFatWrkzp2bdevWERwcDDzqrbl9+zaNGzdOtL49e/awd+9eZs6cySuvvALAyy+/TIMGDTh9+jTw6Cq+SZMmERISwqRJk6zbFi5cmLZt27Jjxw6qV68OwP379xkzZox1/4ULF6ZGjRrs2LGDokWL8sUXX3Dp0iW+/PJLa89SmTJlqF27trXep3nP4uLiGDFiBIUKFUryeyiSVOr5EUkDif1lHhgYyIMHD2jQoAFTp07lwIEDVKtWje7duz/2L/l4/xy2SExoaKjN5OqwsDBcXFz44Ycfnv4AHuPHH38EoEGDBjbl9erVw8nJyaanIlu2bDbzhXLnzg08+qWamEOHDhEbG5ug7oCAAPLly/dMV0ft3buXypUr4+7uTlxcHHFxcXh6ehIQEGB9fwIDA8mXLx9ff/018KinZfPmzdb5WwcPHuT+/fuEhYVZ64iLi7MOve3evfup2lS3bl2b17Vr1yYyMpKzZ89iNBpp3Lgx3377rfX9+uKLL6hcuTL58uV77DG6uLhQq1Yta5nRaOS1116zvj579iyRkZEJjiEwMBBPT88Ex/D3EB7//YsfHtu/fz8FChSwOTfz5Mljs83TvGfu7u7pbn6ZPD/U8yOSiq5cuYK7uztZs2ZNsKx8+fLMmzePJUuWsHDhQiIiIsiRIwcdO3bk7bfffmK9vr6+/7rvf/Y0GY1GsmbNSnR09FMdw5PED1nlyJHDptzZ2RkfHx9u375tLfPw8LBZJz7g/XMeyz/r/udxxJf9ve6ndfPmTTZu3MjGjRsTLMuWLZu1fQ0aNOCzzz5jyJAh7Ny5k+joaOuQV/xE606dOiW6j/i5W0n1z/cw/nsc/z68+eabRERE8O2331KlShV2795tcxXeP0VFRZE1a1aMRtu/cf/+fsYfw4gRI2zmMT3uGP7+PYyvN35o7NatW9b37p/Hde3aNZv9JeU98/X1/dc/AkSSS+FHJJWYTCZ+/PFHKlSogJOTU6LrhISEEBISwv3799m7dy/Lli1j7NixlCtXjrJlyz7T/v8ZckwmE1FRUdZfqgaDIcF9iP4+yTUpsmTJAjyaD5I/f35r+cOHD4mKisLHxyc5Tbep+9q1axQtWtRm2V9//UWBAgWSXbeXlxdVqlShXbt2CZb9fW7J66+/TkREBPv27eOrr76iQoUK1v16e3sDj+4xVLhw4QT1JBbanuSfc5/iA0P896tAgQJUqlSJb775htu3b+Ph4WEdzkpMrly5iIqKwmw22wSg69evW7+OP4a+fftah/b+Lv57kBQ+Pj6cP38+QXl84Pn7/lLqPRNJLg17iaSSlStXcvXqVd56661El0+YMIEmTZpgsVjw8PCgRo0a1jkfly9fBkjwV/vT+OGHH2wmWm/atIm4uDgqV64MQObMmYmKirK5Kuvnn3+2qeNxoS3e3+fC/N3XX3+NyWSiYsWKyW5/2bJlcXV1TVD3/v37uXTpEhUqVEh23ZUqVeL06dOUKFGC0qVLU7p0aUqVKsWSJUvYvHmzdb0XXniB0qVL8/XXX7N9+3Zrr098+1xcXLhy5Yq1jtKlS+Pi4sLkyZO5cOHCU7Vp165dNq+//vpr8uTJYzPnpUmTJvzwww+
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Grupowanie danych\n",
"grouped_data = dane.groupby(['Gender', 'Level']).size().unstack()\n",
"\n",
"# Ustawienia kategorii i szerokości słupków\n",
"categories = grouped_data.columns\n",
"bar_width = 0.35\n",
"bar_positions_man = np.arange(len(categories))\n",
"bar_positions_woman = [pos + bar_width for pos in bar_positions_man]\n",
"\n",
"# Wygenerowanie wykresu słupkowego\n",
"fig, ax = plt.subplots()\n",
"\n",
"ax.bar(bar_positions_man, grouped_data.loc[1], width=bar_width, label='Man')\n",
"ax.bar(bar_positions_woman, grouped_data.loc[2], width=bar_width, label='Woman')\n",
"\n",
"# Dodanie wartości procentowych do słupków\n",
"for i, column in enumerate(categories):\n",
" for j, value in enumerate(grouped_data.index):\n",
" total = grouped_data[column].sum()\n",
" percent = grouped_data.loc[value, column] / total\n",
" height = grouped_data.loc[value, column]\n",
" ax.text(i + j * bar_width, height + 0.2, f'{percent:.0%}', ha='center', va='bottom') \n",
"\n",
"# Ustawienia etykiet i tytułów\n",
"plt.xlabel('Gender')\n",
"plt.ylabel('Count')\n",
"plt.title('Distribution of level by gender')\n",
"\n",
"# Dodanie legendy\n",
"plt.legend(title='Level')\n",
"\n",
"# Zmiana etykiet osi x\n",
"ax.set_xticks([pos + bar_width / 2 for pos in bar_positions_man])\n",
"ax.set_xticklabels(categories)\n",
"\n",
"# Wyświetlenie wykresu\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "8d81604c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Smoking\n",
" 1 181\n",
" 2 222\n",
" 3 172\n",
" 4 59\n",
" 5 10\n",
" 6 60\n",
" 7 207\n",
" 8 89\n",
" dtype: int64]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane3 = [dane.groupby('Smoking').size()]\n",
"dane3 "
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "d85261ce",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABMoAAAImCAYAAACipWb6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3yN5//H8Ve2IFPtEatmEjOIUcSoWFWkFYoSe9WeRfVr16w9QmpUKMq3NdqKr6Ko2qtFrRapUYnYSc45vz/yy6nTBAkhDu/n45FHk+u+7uv+3Hc+oj65ruu2MZlMJkRERERERERERF5ztukdgIiIiIiIiIiIyMtAhTIRERERERERERFUKBMREREREREREQFUKBMREREREREREQFUKBMREREREREREQFUKBMREREREREREQFUKBMREREREREREQFUKBMREREREREREQFUKBMREZGXlMlkSu8QXooYREREROTFUaFMREREUq1169YULVrU/FGsWDHKlClD06ZNWbp0KQaDwaJ/QEAAgwcPTvH4ERERDBo06In9Bg8eTEBAwFNf51FiY2MZN24c33zzzSOv9TKYNGkSFStWpHTp0qxbty69w0midevWtG7d+pHH165dS9GiRbl48eJzj2XGjBkULVr0uV8nvRQtWpQZM2Y812scOXKEt99+m9jYWAD2799PgwYNKF++PAMHDuTu3bsW/ZcsWUL79u2TjPP7778TEBBATEzMc41XRETkaahQJiIiIk+lRIkSrFy5kpUrV7J8+XImT56Mj48PY8eOpV+/fhazsWbOnEm3bt1SPHZYWBiRkZFP7NetWzdmzpz5VPE/ztWrVwkLCyM+Pv65X+tpnTp1igULFlC3bl0WLlzIW2+9ld4hpVqNGjVYuXIl2bJlS+9Q5AkePHjAoEGD6NevH46OjsTGxtKnTx/KlCnD5MmTOXz4MHPmzDH3v337NnPnzqVv375JxipcuDABAQGMGTPmRd6CiIhIitindwAiIiJinTJnzkzp0qUt2gICAihQoADjxo0jICCAxo0bAwlFtechX758z2Xc9L5WSkRHRwOYZ/RYI09PTzw9PdM7DEmBL7/8EhsbG+rWrQvA2bNnuXLlCv369cPDw4Nz587xzTff0K9fPwAWLFhAxYoV8fb2Tna8Tp06UaNGDdq0aUPJkiVf2H2IiIg8iWaUiYiISJpq3bo12bJlIzw83Nz27yWRGzdupHHjxvj6+lKpUiX69+/P1atXzefv3buXvXv3UrRoUX7++Wd+/vlnihYtSnh4ODVr1qRy5crs3Lkz2eWQcXFxjB49Gj8/P/z8/Bg0aBA3btwwH0/unIsXL1K0aFHWrl3LxYsXqVWrFgBDhgwx9/33eQaDgeXLl9OoUSN8fX2pUaMGkyZN4sGDBxbX+vDDD1mzZg1vv/023t7eNG7cmB9//PGJz3Hjxo00bdqUMmXKUKVKFUaMGMHNmzeBhGWEiUsa27Zt+9gloUuXLqVevXr4+PhQrVo1PvnkE27fvm0+XrRoUVasWMHgwYMpV64cFSpUYPTo0dy/f58JEyZQqVIlKlasyLBhwyzu7cGDB8yaNcs8dt26dZk/fz5Go/GRsezYsQNvb2+GDBmCyWRKsvQypc/r4MGDtGrVitKlS1OjRg2++OILPvzwwxQtu92yZQtvv/02Pj4+BAUFsXv3bgDi4+OpWrWqudDzsMDAQIYMGfLIMc+cOUPHjh0pW7YslStXZurUqQwZMsRi2anRaGT+/PnUqVMHb29v3n77bZYuXWoxTuvWrRk2bBjz58+nRo0a+Pj40KJFCw4fPmzRb+/evbz//vuUKlWKt99+m127diWJ6cGDB0ycOJHq1avj7e1No0aN2Lhxo0WfgIAAxo4dS9u2bSlbtiwjRoxI9v5iY2NZvHgxjRo1SnIsQ4YMADg4OJi/91euXGH58uX07t37kc8sW7ZsVKpUifnz5z+yj4iISHpQoUxERETSlJ2dHf7+/hw5csRi6WKi/fv3079/f+rWrcuCBQsYMmQIe/bsMRcoRo4cSYkSJcxLOx+ebTJ16lQGDRrEoEGDksxmS7Rp0yaOHTvG+PHjGThwINu2bUvVss9s2bKZl1h27dr1kcstR4wYwdixYwkICGDOnDm0atWKZcuW0a1bN4tlp8eOHSM0NJRevXoxa9Ys7O3t6dWrl7nolZzZs2fTp08fSpUqxeeff0737t357rvvaN26Nffv3ycoKMhc1BgxYsQjY9ywYQMTJkygVatWhIaG0r17d9avX8/o0aMt+k2aNAlHR0dmzpzJO++8w9KlS2nSpAmRkZF89tlntGjRgtWrV5sLOyaTiS5durBw4UKaN2/O3LlzqVevHtOmTWPkyJHJxvLLL7/Qo0cPGjRowJgxY7CxsUm235Oe15kzZ/jwww8BmDJlCj179mT+/Pns37//kc/zYUOHDqVNmzbMmDGDTJky0bFjR37//Xfs7e1p0qQJW7ZssSgkHj58mLNnz9K0adNkx7tx4wYffPABkZGRjBs3jo8//pjNmzfz7bffWvT75JNP+Pzzz2ncuLH5eY0dO5ZZs2ZZ9Pvuu++IiIjg448/ZsqUKVy/fp1evXqZ9/07fvw47du3J3PmzEyfPp22bdsmWd5oMpno3r074eHhtGvXjjlz5lCmTBn69OmTZC+75cuXm/c3e+edd5K9x59//pkrV65Qr149c1v+/Pnx8PBgzZo13Lhxg82bN1OuXDkgoZDbqFEjvLy8HvOdSChARkREcOfOncf2ExEReZG09FJERETS3BtvvEFcXBzR0dG88cYbFsf279+Pk5MTHTt2xMnJCQB3d3eOHj2KyWSicOHCZM6cGSBJMaxFixYW/1hPjqurKwsXLjSP4eHhQffu3dm5cydVq1Z9YuyOjo4UL14cSFhumdyy0d9//53Vq1fTu3dvunbtCkCVKlXIli0bAwcOZPv27VSvXh2AW7dusXbtWvPSzYwZM/LBBx+wZ88e3n777SRj37x5kzlz5hAUFGRRdCpSpAitWrVi7dq1tGzZksKFCwMJ+z09amnrzz//TO7cuWnVqhW2trZUqFCBjBkzEhUVZdGvUKFCfPrppwD4+fmxevVq4uLimDRpEvb29lSrVo2tW7dy4MABALZv386uXbv47LPPzMtrq1SpQoYMGczFm8T4IGET+M6dO1O3bl3GjRuHre2jf1f7pOc1b948MmfOzMKFC3F2dgagYMGCtGjR4pFjPmzkyJE0aNAAAH9/f2rVqsWcOXOYPHkyzZo1Y8GCBXz33Xc0a9YMgK+//pp8+fI9cnnr0qVLuXPnDuvWrSN79uwA5pleic6dO8eqVavo27cvnTp1AqBq1arY2Ngwb948WrZsiYeHB5Awsy00NNScv3fu3GHQoEH8+uuveHt7M2/ePDw9PZkzZw6Ojo5Awp+fPn36mK+3a9cuduzYwdSpU6lfvz4A1apV4969e0yaNImGDRtib5/wz4Bs2bIxePDgx35P9uzZg6urKwUKFDC3ZciQgfHjxzNkyBBGjx6Nv78/PXr04Pfff2fz5s1s2rSJbdu28fnnn2NnZ0efPn2oXLmyxbg+Pj7ExcWxb98+858XERGR9KYZZSIiIvLcJDdryM/Pj/v379OoUSOmTp3K/v37qVq1Kj169HjkLKNEKXlrYfXq1c1FBkhYXubg4JDs8rSntXfvXoAkS9EaNGiAnZ0dP//8s7nN09PTYn+zHDlyAHDv3r1kxz506BCxsbFJxi5fvjy5c+e2GPtJKlWqxPnz52natCmzZ8/mxIkTNGrUiLZt21r0K1OmjPlze3t7PDw88Pb2NhdTIKEYc+vWLfP929nZmYswiRKLZg/HePnyZTp27IjJZGLkyJGPLcjAk5/Xnj17qF69urlIlhh/7ty5n/g87OzszHtsATg5OfHWW2+Zc6NAgQKUK1eO9evXAwlLDjdu3EiTJk0emZt79uyhTJky5iIZQO7cuS2e6Z49ezCZTAQEBBAfH2/+CAgI4MGDBxaz4R4uFAPmcRPvf//+/VSrVs1cJAOoW7cudnZ25q93796NjY0N1atXT3K9a9e
"text/plain": [
"<Figure size 1400x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"dane['Gender'] = dane['Gender'].replace({1: 'Man', 2: 'Woman'})\n",
"smoking_counts = dane.groupby(['Smoking', 'Gender']).size()\n",
"\n",
"# Zamiana liczby na procent\n",
"smoking_percentages = smoking_counts / smoking_counts.groupby('Gender').sum() * 100\n",
"\n",
"# Sortowanie danych według stopnia 'Smoking'\n",
"smoking_percentages_sorted = smoking_percentages.sort_index(level='Smoking', sort_remaining=False)\n",
"\n",
"plt.figure(figsize=(14, 6))\n",
"\n",
"# Tworzenie wykresu słupkowego poziomego\n",
"ax = smoking_percentages_sorted.plot(kind='barh')\n",
"\n",
"# Dodawanie wartości procentowych do słupków\n",
"for i, value in enumerate(smoking_percentages_sorted):\n",
" ax.text(value + 0.1, i, f'{value:.2f}%', ha='left', va='center')\n",
"\n",
"# Dodanie legendy\n",
"plt.legend()\n",
"\n",
"# Ustawienia etykiet i tytułów\n",
"plt.xlabel('Percentage')\n",
"plt.ylabel('Smoking, Gender')\n",
"plt.title('Distribution of smoking by gender (%)')\n",
"\n",
"# Wyświetlenie wykresu\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "f5daf17c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Passive Smoker\n",
" 1 60\n",
" 2 284\n",
" 3 140\n",
" 4 161\n",
" 5 30\n",
" 6 30\n",
" 7 187\n",
" 8 108\n",
" dtype: int64]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane3 = [dane.groupby('Passive Smoker').size()]\n",
"dane3 "
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "86122d04",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABRQAAAImCAYAAAAi8wbAAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3yN5//H8Vd2jEwl9q6ZxIwIUSRoKaVGK0ZVUbu1R9Vq7RIrYqZSo2aVquhAW5S2NqFGg7Y2lWUmOef8/sgv5+s0QRJRxPv5eHg057qv+7qv+z6fo/E517AymUwmRERERERERERERNLB+kl3QERERERERERERJ4dSiiKiIiIiIiIiIhIuimhKCIiIiIiIiIiIummhKKIiIiIiIiIiIikmxKKIiIiIiIiIiIikm5KKIqIiIiIiIiIiEi6KaEoIiIiIiIiIiIi6aaEooiIiIiIiIiIiKSbEooiIiLyzDGZTE+6C09FH0REREREngQlFEVERCRLdezYkbJly5r/lCtXjipVqtCyZUuWLl2KwWCwqB8QEMCwYcPS3f7WrVsZOnToQ+sNGzaMgICATF/nfhISEpg4cSIbN26877WeBlOnTsXX15fKlSuzfv36J92dB/r1118pW7Ysv/7665PuymPVsWNHOnbs+KS7YfY0xm1WOXfuHGXLlmXdunWP9TrffvstQUFB5tfff/89gYGB+Pr6MmHChFR/302cOJGRI0emaufnn3+mRYsWJCYmPtb+ioiIZBXbJ90BERERyX4qVKjA6NGjATAYDMTGxvLTTz8xYcIE9u3bx/Tp07GysgIgJCSE3Llzp7vt8PDwdNXr1asXb731Vob7/jBXrlwhPDyciRMnPvZrZdbJkydZuHAhb7zxBs2bN6dkyZJPuksPVLFiRVatWkXp0qWfdFdE0u369euMHTuWBQsWmF8PHjyYTp064eXlxciRIylZsiRt27YFkpOc69at4+uvv07VVu3atVm6dClz587lvffe+0/vQ0REJDOUUBQREZEslzt3bipXrmxRFhAQQIkSJZg4cSIBAQG89tprQHLy8XEoWrToY2n3SV8rPWJiYgB49dVXqV69+pPtTDqkFS8iT7vQ0FAqVqyIp6cnAPv378fGxoZ+/fphZWXFL7/8wq5du8wJxenTp/Pmm2/i4eGRZnu9evWiffv2tG3blnz58v1n9yEiIpIZmvIsIiIi/5mOHTuSL18+Vq5caS7791TkiIgIXnvtNby9valZsyaDBg3iypUr5vN/++03fvvtN/MU2ZTpsitXrqR+/frUqlWLnTt3pjmdMzExkXHjxuHj44OPjw9Dhw7l+vXr5uNpnXPv1Mlz584RGBgIwPDhw811/32ewWBg+fLlNGvWDG9vb+rVq8fUqVO5e/euxbXefvttvvjiC15++WU8PT157bXX+Omnnx76HCMiImjZsiVVqlShdu3ajBo1itjYWABmz55tnlbbqVOn+05pTXluO3fupH379nh7e9OwYUOWLVtmUS9lFFb9+vXx9PSkRo0a9O7dm3Pnzpnr/P333/Ts2RNfX18qVarEm2++aXEfd+/eZezYsbz00kt4enryyiuv8Omnn6bqy6+//sr+/fspW7YsW7ZssehHVFQUZcuWZfPmzeY2p0yZQt26dfH09KRZs2ZEREQ89NktXbqUV155BS8vL+rUqcOYMWO4ceOG+XjZsmVZsWIFw4YNo1q1atSoUYNx48Zx584dJk+eTM2aNfH19WXEiBEW7+fdu3eZM2eOue1GjRqxYMECjEbjffuyY8cOPD09GT58uHlNzgsXLjBgwABq1KhBpUqV6NSpE8eOHTOfkxKPixcvpnHjxtSoUYN169Y99Bk/yKpVq6hXrx7e3t4W14uJicHLy4vg4GCL+nfv3sXHx4eQkJD7tnngwAHat29P5cqVqVevHp999hlvv/22xWc9Pe9hQEAAs2bNYvLkydSqVQtvb2+6dOnCmTNnLOp999135r83Xn/9dY4fP56qTzExMYwaNYpatWrh5eXFG2+8we7duy3qlC1blpCQEFq1akW1atUIDQ1N8/6uX7/O2rVradasmbnMysoKe3t78+hrOzs78/t/7Ngxdu7cybvvvnvfZ+bt7U3BggXTPQpbRETkSVJCUURERP4zNjY2+Pn5cfjwYZKSklId37dvH4MGDaJRo0YsXLiQ4cOH88svvzBw4EAARo8eTYUKFahQoQKrVq2iYsWK5nOnT5/O0KFDGTp06H1Hu23evJnIyEgmTZrEkCFD+PHHH+nVq1e6+58vXz5zEqVnz573TaiMGjWKCRMmEBAQwNy5c2nfvj3Lli2jV69eFpu5REZGEhYWxnvvvcecOXOwtbXlvffeMycH0xIaGkr//v2pVKkSs2bNonfv3nz77bd07NiRO3fu0KZNG0aNGmXux4OSPgD9+/enQoUKzJkzh9q1a/Pxxx+zdOlSIHnjme7du/Pzzz8zcOBAwsLC6NWrF7t27TJfw2g00r17d27dusWUKVMIDQ3F1dWVXr168eeffwIwfvx4fvrpJ4YOHUpYWBiBgYFMnjw5zfXtqlatSrFixVIlljZu3IiTkxMBAQGYTCZ69+7NypUr6dy5M3PnzqVKlSr079//getFbtq0icmTJ9O+fXvCwsLo3bs3GzZsYNy4cRb1pk6dir29PSEhITRv3pylS5fSokULLl68yCeffELbtm1Zu3atxXPq0aMHixYtonXr1sybN49XXnmFGTNmmKf+/9uePXvo06cPr776KuPHj8fKyorr16/Ttm1bjh49ysiRI5k2bRpGo5H27dsTFRVlcf706dPp0qUL48aNo2bNmhl6xve6dOkSs2fPpl+/fgQHBxMbG8tbb73F9evXcXV1pUGDBmzcuNEibrdu3Up8fDwtWrRIs82oqCjefvttAIKDg+nbty8LFixg37595joZeQ+XLFnC6dOnmThxIuPGjSMyMtIiMblt2zbee+89XnzxRUJCQmjcuDGDBw+2aOPu3bt06tSJrVu30r9/f0JCQsifPz9du3ZNlVScO3cuL7/8MsHBweYvEP7tu+++IykpyeK4p6cn8fHxbNmyhcuXL/Pjjz9SrVo1AD755BO6deuGs7Nz2m/E/3vllVf46quvHlhHRETkaaApzyIiIvKfeuGFF0hMTCQmJoYXXnjB4ti+fftwcHCgW7duODg4AODq6sqRI0cwmUyULl3avN7iv5OGbdu25ZVXXnngtZ2dnVm0aJG5DTc3N3r37s3OnTvx9/d/aN/t7e0pX748kDzNOa3p2n/88Qdr166lX79+9OzZE0heHy1fvnwMGTKE7du3U7duXQDi4+NZt26decp0zpw56dChA7/88gsvv/xyqrZjY2OZO3cubdq0sUhUlSlThvbt27Nu3TratWtnXouwdOnSD51S3qBBA0aMGAFAnTp1uHLlijkJevXqVXLkyMHQoUPNU6d9fX05d+6ceZTpP//8Q1RUFD169DDfl7e3NyEhIeYRfL/99hu1atXi1VdfNbeRM2dO3Nzc0uzTa6+9RlhYGLdv3yZHjhxAcjLwlVdewcHBgZ9//pkdO3Ywffp0mjRpYu777du3mTp1Kk2bNsXWNvWvub/++iuFChWiffv2WFtbU6NGDXLmzEl0dLRFvVKlSvHRRx8B4OPjw9q1a0lMTGTq1KnY2tpSp04dtm3bxv79+wHYvn07u3bt4pNPPjFP5a9duzaOjo7MnDmTTp06WawPefjwYbp3706jRo2YOHEi1tbJ3/F/9tlnxMTEsGLFCgoVKgTASy+9RJMmTZg5cyazZs0yt9GoUSNat25tfp3RZ5zCYDAQEhJi/jxVqlSJBg0aEB4ezoABA2jVqhURERH8+uuv1KxZE4Avv/wSX19fChcunGab8+fPJ3fu3CxatMj8/t27liDArl270v0eOjs7Exoaio2NDQB//fUXs2fPJjo6Gjc3N+bMmUPFihWZNm2a+ZkB5tcAGzZs4Pjx46xevZpKlSqZ63Xs2JGpU6fyxRdfmOt6e3s/cCQhwC+//EKpUqXIlSuXuczDw4PRo0czZMgQ7ty5w8svv0z
"text/plain": [
"<Figure size 1500x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\n",
"dane['Gender'] = dane['Gender'].replace({1: 'Man', 2: 'Woman'})\n",
"smoking_counts = dane.groupby(['Passive Smoker', 'Gender']).size()\n",
"\n",
"# Zamiana liczby na procent\n",
"smoking_percentages = smoking_counts / smoking_counts.groupby('Gender').sum() * 100\n",
"\n",
"# Sortowanie danych według stopnia 'Passive smoker'\n",
"smoking_percentages_sorted = smoking_percentages.sort_index(level='Passive Smoker', sort_remaining=False)\n",
"\n",
"plt.figure(figsize=(15, 6))\n",
"\n",
"# Tworzenie wykresu słupkowego poziomego\n",
"ax = smoking_percentages_sorted.plot(kind='barh')\n",
"\n",
"# Dodawanie wartości procentowych do słupków\n",
"for i, value in enumerate(smoking_percentages_sorted):\n",
" ax.text(value + 0.1, i, f'{value:.2f}%', ha='left', va='center')\n",
"\n",
"# Dodanie legendy\n",
"plt.legend()\n",
"\n",
"# Ustawienia etykiet i tytułów\n",
"plt.xlabel('Percentage')\n",
"plt.ylabel('Passive Smoker, Gender')\n",
"plt.title('Distribution of passive smokers by gender (%)')\n",
"\n",
"# Wyświetlenie wykresu\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "28c8acde",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABRMAAAImCAYAAADALx25AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3zO1///8UcmEZlK7F0JTWJGxChCraK+VsX8WLVVVe1Rbc0SFBE7qKJUVc0WnxZt1R6hRmOrLRGJkeS6rt8f+eX69GqCLKI877dbbpVzzvu8z/udl6ReOcPKZDKZEBEREREREREREXkK66wegIiIiIiIiIiIiPw7KJkoIiIiIiIiIiIiqaJkooiIiIiIiIiIiKSKkokiIiIiIiIiIiKSKkomioiIiIiIiIiISKoomSgiIiIiIiIiIiKpomSiiIiIiIiIiIiIpIqSiSIiIiIiIiIiIpIqSiaKiIjIK89kMmX1EF6IMbys9G5FREREMo+SiSIiIvJC69ChA56enuYPLy8vypcvT/PmzVm2bBkGg8GifWBgIEOHDk11/9u3b2fIkCFPbTd06FACAwPTfZ/HiYuLY8KECXz//fePvdeLYMqUKfj7+1OuXDnWrVuX1cNJlef5bteuXYunpyeXL19+Ju3Te82/SWb9nXqSy5cvU6tWLe7cuQNAREQErVq1okKFCvTs2ZNbt25ZtN++fTsNGzZM9n3mzp071KxZk0uXLj3T8YqIiLyIlEwUERGRF16ZMmVYtWoVq1atYvny5UydOhUfHx/Gjx/Phx9+aDHzbNasWfTu3TvVfYeFhXH16tWntuvduzezZs1K1/if5MaNG4SFhZGQkPDM75Vep0+fZv78+dSrV48FCxbw5ptvZvWQUuV5vttatWqxatUq8uTJk+l9S+YwmUwMHz6cTp064e7uDsDgwYPJlSsXM2fOJDIykvHjx5vbGwwGgoODGThwIDY2NhZ9ubu785///Ifhw4dr5quIiLxybLN6ACIiIiJPkzNnTsqVK2dRFhgYSLFixZgwYQKBgYE0bdoUSEw8PguFCxd+Jv1m9b1SIyoqCoC3336bSpUqZe1gMuhZvVt3d3dzgkpeTD/++CMnT55k/vz5ANy7d4/w8HC++eYbvL29uX//PqNGjTK3/+abb8iZMydvvfVWiv21bduW0NBQtm3b9tg2IiIiLyPNTBQREZF/rQ4dOpAnTx5WrlxpLvvnUslNmzbRtGlTfH19qVKlCoMGDeLGjRvm6/fu3cvevXvx9PTk999/5/fff8fT05OVK1dSu3Ztqlatyu7du1NcHhsfH89nn32Gn58ffn5+DBkyxLx8ElJeUnv58mU8PT1Zu3Ytly9fpk6dOgAMGzbM3Paf1xkMBpYvX06TJk3w9fWlVq1aTJkyhUePHlnc6z//+Q/ffPMN9evXx9vbm6ZNm/Lzzz8/9T1u2rSJ5s2bU758eapVq8bo0aO5e/cuADNnzqRDhw4AdOrU6YlLhCMiIujevTsVKlSgatWqTJs2jWHDhpmvBzAajcybN4+33noLb29v6tevz7Jlyyz66dChAyNGjGDevHnUqlULHx8f2rRpw5EjRyzanT59mh49elChQgUqVKhAnz59zMtOU/tuTSYTy5cv5+2338bX15e33nqL+fPnP3G2maenJ7NmzaJFixZUrFiRkJCQZEuQ79y5w6BBg6hWrRo+Pj688847T1weHh0dzTvvvENgYOBTlzEfPHiQZs2a4ePjQ5MmTdi0aZO5rkWLFrRp0ybZNV27drX4OvzTjRs3+OCDD6hcuTJ+fn6MHj2aadOmJft6r169mrfffhtvb29q1arFzJkzLWZ+pjYOT548SefOnSlfvjy1a9dm/fr1ycaU2lgZNGgQ/fv3p0KFCrz33nuPfca5c+dSr149smXLBoCVlRUA2bNnB8DOzg6j0QjAgwcPmDlzJh9++OFj+8uWLRv16tVj7ty5j20jIiLyMlIyUURERP61bGxsCAgI4OjRoxYJjSQHDhxg0KBB1KtXj/nz5zNs2DD27NljThCMGTOGMmXKmJdRv/HGG+Zrp02bxpAhQxgyZEiyWZFJNm/eTHh4OBMnTmTw4MH89NNPaVpinSdPHvOS2169ej12+e3o0aMZP348gYGBzJkzh3bt2vHll1/Su3dvi6RXeHg4CxcupH///syePRtbW1v69+9vTgymJCQkhA8++ICyZcvyxRdf0KdPH7Zu3UqHDh14+PAhrVq1YvTo0eZxPG6Md+7coX379ly9epUJEyYwcuRItmzZwoYNGyzaffzxx3zxxRc0bdqU0NBQGjRowPjx45k9e7ZFu61bt7J9+3ZGjhxJcHAwt27don///ua9686dO0ebNm24ffs2EydOZNy4cVy6dImgoCBu376d6ncbHBzMuHHjqFmzJnPmzKFVq1ZMmzaNkJCQx74zgDlz5lC/fn2Cg4PNScu/++ijj/jzzz8ZO3Ys8+bNo0yZMgwZMoTff/89WdvY2Fi6d+9OdHQ0S5YsoWDBgk+896hRo2jQoAGzZ8+mZMmSfPDBB+zevRuAli1bcujQIS5cuGBuf/36dX777TdatGiRYn9xcXF06tSJgwcPMnz4cCZMmMDJkydZtGiRRbu5c+cyatQoAgICCA0NpV27dsyfP98cH0meFofXr1+nffv23L17l88//5z333+fKVOmcP36dYt+Uhsrmzdvxs7OjtmzZ9OxY8cUn/Hs2bOEh4fToEEDc1nOnDkpWbIka9euJTo6mvXr11OhQgUgcfuD0qVLU7ly5cd+HQAaNmzIsWPHOHfu3BPbiYiIvEy0zFlERET+1V577TXi4+OJioritddes6g7cOAA2bJlo3v37ubZSK6urhw7dgyTyUTJkiXJmTMnQLKEYZs2bSwSDylxdnZmwYIF5j7c3Nzo06cPu3fvpnr16k8du729PaVLlwYSl9+mtET7zz//ZM2aNQwYMIBevXoBUK1aNfLkycPgwYPZuXMnNWvWBBKXba5du9a8lDdHjhy0b9+ePXv2UL9+/WR9371715xAGzNmjLm8VKlStGvXjrVr19K2bVtKliwJQMmSJR+7jHzZsmXExsaybt06PDw8AChbtqzFfc+dO8fXX3/NwIEDzTPIqlevjpWVFXPnzqVt27a4ubkBkJCQwMKFC83vNjY2liFDhvDHH3/g7e3NrFmzyJ49O2FhYeY2AQEB1K1blwULFjBkyJCnvtvo6GgWL15Mhw4dGDx4sPnd3rlzhwMHDqT4nEl8fX0tZsEdP37con7v3r307t2bunXrAuDv74+rq2uyvfcePXpEr169uHbtGl9++SWFChV64n0B+vTpY773m2++yfnz55k1axbVq1encePGTJw4ke+++47+/fsDsH79erJnz069evVS7G/9+vWcPXvWvNwXoEqVKuaxQ2JszZkzh3fffZeRI0cCiV87V1dXRo4cSefOnXn99dfNbZ8Uh0n7WM6fP59cuXIBUKxYMVq3bm2+X1pixdramk8//ZQcOXI89p3t2bMHSPy6/d2ECRMYMGAACxcuxNvbmxkzZnDnzh0WLVrEsmXLOHr0KOPHj+fhw4d06dLFvJ1CEh8fHwB+++03ihUr9tj7i4iIvEw0M1FEREReCklLFv/Oz8+Phw8f0qRJE6ZNm8aBAweoXr06ffv2TbH933l6ej71njVr1jQnsiBxibWdnR2//vpr2h/gMfbu3QtAkyZNLMrffvttbGxsLGa6ubu7W+wJmDdvXiBxyWZKDh8+TFxcXLK+K1WqRIECBVKcRfc4e/bsoXz58uZEIkCBAgUoX768RRuTyURgYCAJCQnmj8DAQB49emSRwPt7ohcw95v0LHv27MHf35/s2bOb+8mZMyeVKlVK9fs/fPgw8fHxyfa7Gzp0aLJZef9UqlSpJ9b7+/szc+ZM3n//fdauXcudO3cYMmRIsj0nBw8ezO+//06/fv1SlUiExNlwf1e3bl0OHz5MbGwsTk5O1KtXz2LZ8Lp162jQoMFjk2179uyhUKFC5kQiJM7aq127tvnzQ4cO8eDBgxS/dgC//PKLue3T4vDAgQOUK1f
"text/plain": [
"<Figure size 1500x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"dane['Gender'] = dane['Gender'].replace({1: 'Man', 2: 'Woman'})\n",
"Genetic_risk_counts = dane.groupby(['Genetic Risk', 'Gender']).size()\n",
"Genetic_risk_percentages= Genetic_risk_counts / Genetic_risk_counts.groupby('Gender').sum() * 100\n",
"\n",
"Genetic_risk_percentages_sorted = Genetic_risk_percentages.sort_index(level='Genetic Risk', sort_remaining=False)\n",
"\n",
"plt.figure(figsize=(15, 6))\n",
"\n",
"# Tworzenie wykresu słupkowego poziomego\n",
"ax = Genetic_risk_percentages_sorted.plot(kind='barh')\n",
"\n",
"# Dodawanie wartości procentowych do słupków\n",
"for i, value in enumerate(Genetic_risk_percentages_sorted):\n",
" ax.text(value + 0.1, i, f'{value:.2f}%', ha='left', va='center')\n",
"\n",
"# Dodanie legendy\n",
"plt.legend()\n",
"\n",
"# Ustawienia etykiet i tytułów\n",
"plt.xlabel('Percentage')\n",
"plt.ylabel('Genetic risk, Gender')\n",
"plt.title('Distribution of genetic risk by gender (%)')\n",
"\n",
"# Wyświetlenie wykresu\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "818e4ee5",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA44AAAImCAYAAAAcz9SjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAC6GElEQVR4nOzdeVyUVf//8dewC4KCCy7gWmmlhAoqLimjVlia0qJkZi7dmktqt1ullGYuJSqKpimKlRVm5p1py6191VvcbjU10sxbyxTJVEABi2Vmfn/wc2oEERQciPfz8eARc65znetzMcfgM+dc5xgsFosFERERERERketwsHcAIiIiIiIiUrYpcRQREREREZFCKXEUERERERGRQilxFBERERERkUIpcRQREREREZFCKXEUERERERGRQilxFBERERERkUIpcRQREREREZFCKXEUEZHbxmKx2DuEMhGD/H2oP4lIRaHEUUREAOjfvz9NmjSxfjVt2pQWLVoQHh7Oe++9h8lksqlvNBqZNGlSkdvfsmULEydOvGG9SZMmYTQab/o615Odnc3MmTPZsGHDda9VFsyZM4c2bdoQGBjI+vXr8x3fs2cPTZo0Yc+ePbc9thv9vOwZmz18/PHHzJ492/p63bp1NGnShDNnzpTodc6cOUOTJk1Yt25dqdQXESkKJ3sHICIiZcc999zDq6++CoDJZOLSpUts27aNGTNmsH//fubNm4fBYAAgJiaGypUrF7ntuLi4ItUbPnw4zzzzTLFjv5HffvuNuLg4Zs6cWerXulk//vgjy5Yt48knn+TRRx+lUaNG9g5JCvH222/TunVr6+vOnTsTHx9PzZo1S/Q6NWvWJD4+nnr16pVouyIixaHEUURErCpXrkxgYKBNmdFopGHDhsycOROj0UjPnj2BvCSzNNzOP47L2h/iaWlpADz88MMEBQXZNxgpNh8fH3x8fEq8XRcXl3z/LkVEbjdNVRURkRvq378/NWvW5KOPPrKWXTuFdNOmTfTs2ZOAgADatm3LuHHj+O2336zn7927l71791qnMl6d1vjRRx8RGhpKu3bt2LFjR4HTIXNycpg+fTrBwcEEBwczceJEUlJSrMcLOuev0/XOnDlDly5dAHjppZesda89z2QysXr1anr06EFAQACdO3dmzpw5ZGVl2Vzr2Wef5ZNPPuHBBx+kWbNm9OzZk23btt3w57hp0ybCw8Np0aIF7du3JzIykkuXLgGwcOFC+vfvD8CAAQOKNYX2xx9/ZOjQobRs2ZKWLVsyYsQITp8+DUBWVhZBQUHMmDHD5hyz2UyHDh2YOnWqtezjjz/m4YcfplmzZnTu3JmFCxeSm5tb5Diu+t///sdTTz1F8+bN6datG++995712AsvvECnTp0wm80250RGRtKlS5frPjOYkZFBZGQkISEhtGjRgrFjxxIXF0eTJk1s6m3evJnw8HCaN29O+/btmT59OleuXLEeX7hwId26dWPr1q306NGDZs2a8eCDD/Lpp5/atJOWlkZkZCTt2rWjefPmPPnkk+zatct63Gg0kpSUxKeffmqdnlrQVNWEhAT69etHixYt6NChg817XpD+/fszbtw4XnjhBVq2bMk//vGPfFNPzWYz0dHRGI1GmjVrhtFoZO7cueTk5BTYpsViYdKkSTRv3pzt27df99oiIoVR4igiIjfk6OhISEgIhw8fLjCR2L9/P+PGjeOBBx5g2bJlvPTSS+zevZt//vOfALz66qvcc8893HPPPcTHx3Pvvfdaz503bx4TJ05k4sSJ1x1V+eKLL0hMTGTWrFlMmDCBrVu3Mnz48CLHX7NmTWJiYgB4/vnnrd9fKzIykhkzZmA0Gnn77bfp168f77//PsOHD7dJaBITE4mNjeWFF15g0aJFODk58cILLxSaECxevJixY8dy3333sWDBAkaMGMFXX31F//79+eOPP3jiiSeIjIy0xnG9GK/1008/0bdvXy5evMisWbN44403OH36NBEREVy8eBFXV1cefPBBvvjiC5tkbc+ePZw/f55HH30UgKVLlzJlyhRCQkJYsmQJ/fr1Y9myZdaYimPmzJncd999LF68mI4dOzJ9+nTWrFkDwOOPP86vv/5q8xxkdnY2X3zxBb1797ZOhb7WiBEj+OKLLxg1ahTz5s0jMzOTqKgomzobNmxgxIgRNGrUiEWLFjFy5Eg+++yzfO/f+fPnmTZtGs888wzvvPMOfn5+TJo0iRMnTgB5yfaAAQPYsmULY8eOJSYmhlq1ajFkyBBr8hgTE0ONGjXo1KnTdaenbtu2jSFDhlC1alXmzZvH+PHj+eabb3jhhRcK/fl98cUXODs7s2jRogKnUi9btozVq1czYsQIVqxYQUREBMuXL2fJkiUFtjd9+nQ+//xzYmJiuP/++wu9tojI9WiqqoiIFEn16tXJyckhLS2N6tWr2xzbv38/rq6uPPfcc7i6ugJQtWpVvvvuOywWC3fccYf1echrk8O+ffvy0EMPFXptLy8vli9fbm3D29ubESNGsGPHDjp06HDD2F1cXLj77ruBvOmpBU2z/d///sfatWsZM2YMzz//PADt27enZs2aTJgwge3bt9OpUycA0tPTWbdunXWqq7u7O08//TS7d+/mwQcfzNf2pUuXePvtt3niiSesz5AC3HXXXfTr149169bx1FNPcccddwBwxx13FHkqcExMDG5ubsTFxVl/PiEhIXTt2pXly5czceJEHn30UdauXcu+ffusz+Rt2LCB+vXrExgYSHp6Om+//TZ9+vRh8uTJAHTo0IGqVasyefJkBg4cyJ133lmkeADCw8OtCyF17NiRc+fOsWjRIh5//HE6dOhArVq1WL9+PSEhIUDeKGF6ejq9e/cusL1du3axe/duFi5cyAMPPADA/fffT48ePfjf//4H5I2qzZkzh44dOzJnzhzruQ0aNODZZ59l27ZtdO7cGYDff/+dN954w3r9Bg0aEBoayrZt22jcuDH/+te/+OGHH1izZg333Xef9Xr9+/dnzpw5fPLJJ9xzzz24uLjg4+Nz3Q88FixYQNOmTVm0aJG1zM3Njblz53Lu3Dl8fX0LPM/BwYHXX38dd3d3gHyL7ezdu5d7772Xxx57DIDWrVtTqVKlAp85joqKIj4+noULF1r7r4jIzdCIo4iIFEtBI0LBwcH88ccf9OjRg3nz5rF//346dOjAyJEjrzuCdNW1Uw0L0qlTJ5s/io1GI87OzuzcubP4N3Ade/fuBaBHjx425Q8//DCOjo42I2Q+Pj42z0fWqlULyEtICnLw4EGys7PztR0UFETdunVvaRXS3bt306ZNG9zc3MjNzSU3N5fKlSsTFBRk/fkEBwdTt25dNm7cCOSN8P373/+2Pq/67bff8vvvv2M0Gq1t5ObmWqfLJiQkFCum7t2727zu1q0bv/76KydPnsTBwYHevXvz9ddfW39en376KW3atKFu3brXvUdnZ2e6du1qLXNwcCAsLMz6+uTJk/z666/57iE4OJjKlSvnu4e/JntX37+rU1p37dpFjRo1uPfee63tmEwmQkNDSUxMLHRk+ao//viD77//3iZmgAcffJCvvvrqukkjgJ+fnzVpLEibNm3YuXMnTz31FCtXruTEiRM8/fTT9OrVy6be6tWreeedd+jevTuhoaE3jFlEpDAacRQRkSI5d+4cbm5uVK1aNd+xFi1a8M477xAXF0dsbCxLliyhRo0aPPfccwwYMKDQdqtVq3bDa187wung4EDVqlW5fPlyse6hMFeTgRo1atiUOzk54e3tTXp6urWsUqVKNnWuJsfXPrd3bdvX3sfVsr+2XVxpaWls2rSJTZs25Tt2daEWg8FAjx49WLNmDVOmTGH79u1cvnzZOk316qI8//jHPwq8xtVnVYvq2p/h1ff46s/hscceY8mSJXz99de0a9eOhIQEm9Vur5WamkrVqlVxcLD9vPuvP8+r9zB16lSb5zavdw9/fQ+vtnt1OmtaWhrnz5+3mVL9V+fPn6dKlSrXjRfy7tVisRSpf1+roH7yV0OGDMHDw4NPPvmE2bNnM2vWLO666y5efvl
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\n",
"Genetic_risk_counts = dane.groupby(['Genetic Risk', 'Level']).size()\n",
"\n",
"# Sortowanie danych według ryzyka genetycznego i liczby w odwrotnej kolejności\n",
"Genetic_risk_counts_sorted = Genetic_risk_counts.sort_index(level=['Genetic Risk', 'Level'], key=lambda x: x.map({'High': 1, 'Medium': 2, 'Low': 3}))\n",
"\n",
"plt.figure(figsize=(10, 6))\n",
"\n",
"# Tworzenie wykresu słupkowego horyzontalnego\n",
"ax = Genetic_risk_counts_sorted.plot(kind='barh')\n",
"\n",
"# Dodawanie wartości do słupków\n",
"for i, value in enumerate(Genetic_risk_counts_sorted):\n",
" ax.text(value + 0.1, i, str(value), ha='left', va='center')\n",
"\n",
"# Ustawienia etykiet i tytułów\n",
"plt.xlabel('Count')\n",
"plt.ylabel('Genetic Risk, Level')\n",
"plt.title('Distribution of level by genetic risk')\n",
"\n",
"# Wyświetlenie wykresu\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "f10041ca",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Smoking</th>\n",
" <th>Alcohol use</th>\n",
" <th>Level</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>989</th>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>992</th>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>993</th>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>994</th>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>High</td>\n",
" </tr>\n",
" <tr>\n",
" <th>995</th>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>High</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>256 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" Smoking Alcohol use Level\n",
"index \n",
"4 8 8 High\n",
"20 8 8 High\n",
"22 8 8 High\n",
"46 8 8 High\n",
"68 8 8 High\n",
"... ... ... ...\n",
"989 7 7 High\n",
"992 7 7 High\n",
"993 7 7 High\n",
"994 7 7 High\n",
"995 7 7 High\n",
"\n",
"[256 rows x 3 columns]"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x = dane[(dane['Smoking'] >= 7) & (dane['Alcohol use'] >= 7)]\n",
"x_sorted = x[['Smoking', 'Alcohol use', 'Level']].sort_values(by=['Smoking', 'Alcohol use'], ascending=False)\n",
"x_sorted"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "15eebd5b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Air Pollution\n",
"8 19\n",
"5 20\n",
"7 30\n",
"4 90\n",
"1 141\n",
"3 173\n",
"2 201\n",
"6 326\n",
"Name: count, dtype: int64"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dane7 = dane['Air Pollution'].value_counts()\n",
"dane7.sort_values()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "9bd91fc9",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA44AAAImCAYAAAAcz9SjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAADBm0lEQVR4nOzde1xU1fo/8M9wvyt4QQnES4p6cEQFcbykDFJiaUL1VULzgh4VMS+pWCmlqahHULwgKqOoWVKEnkwss46ZKJoYGGlqmHdE5TqgcpmZ3x/8nBoHcAZmYJTP+/XqpbP22ms/M/OcTs+stdcWKBQKBYiIiIiIiIhqYNTYARAREREREZFhY+FIREREREREtWLhSERERERERLVi4UhERERERES1YuFIREREREREtWLhSERERERERLVi4UhERERERES1YuFIREREREREtWLhSEREDUKhUDR2CAYRA9HTME+JyBCxcCQiIowbNw5ubm7Kf7p27YpevXohMDAQu3fvhkwmU+kvFouxcOFCjcf/4YcfEB4e/tR+CxcuhFgsrvN1alJeXo7IyEgcOHCgxmsZgjVr1sDb2xseHh7Yv3+/2vFTp07Bzc0Np06davDYnvZ5NWZsQFUOjxs3rlGuDai//w0bNsDNzU2rMYqLixEeHo4zZ84o2xr7fRERPWbS2AEQEZFh6N69Oz766CMAgEwmQ1FREX766SesWLEC6enpWLt2LQQCAQBg48aNsLGx0XjshIQEjfqFhobinXfe0Tr2p7l79y4SEhIQGRmp92vV1aVLl7Bt2zb83//9H15//XV07NixsUN6pjzO3WfZhQsXsH//fgQGBirbnof3RUTPBxaOREQEALCxsYGHh4dKm1gsRocOHRAZGQmxWIyRI0cCqCoy9aFdu3Z6Gbexr6WJwsJCAMCrr74KT0/Pxg3mGfTiiy82dgh68by+LyJ69nCpKhER1WrcuHFo3bo19u7dq2x7cglpSkoKRo4cCaFQiH79+mHevHm4e/eu8vzTp0/j9OnTyqV8j5f17d27Fz4+Pujfvz+OHz9e7XLIiooKLFu2DF5eXvDy8kJ4eDjy8/OVx6s75+bNm3Bzc0NycjJu3rwJX19fAMD777+v7PvkeTKZDHv27MGIESMgFAoxZMgQrFmzBmVlZSrXmjBhAr766iu88sorcHd3x8iRI/HTTz899XNMSUlBYGAgevXqhQEDBiAiIgJFRUUAqpY1Pl6OOH78eK2W0F66dAlTp05F79690bt3b8yYMQM3btwAAJSVlcHT0xMrVqxQOUcul2PgwIFYsmSJsu3LL7/Eq6++Cnd3dwwZMgQbNmxAZWWlxnE89ueff+Ltt99Gjx494Ofnh927dyuPvfvuuxg8eDDkcrnKOREREfD19a3x3r78/HwsWbIEPj4+cHd3R9++fTFjxgzcvHlT2efJJZ1ubm7YuHEj3njjDfTp0wexsbHVjj1u3DgsXLgQW7ZswYABA9C7d29Mnz5d+Rk+9ttvvyEkJATe3t7o3bs3pk2bhsuXL2v8uVS35PSfy1tPnTqlnAF/5513lH2fPK+srAybNm3CsGHD0KNHD7z88svYunWrymc6btw4fPjhh9i6dSuGDBmCHj16YMyYMcjMzNQ4XiKiJ7FwJCKiWhkbG0MkEuHcuXPVFhLp6emYN28eXn75ZWzbtg3vv/8+0tLS8N577wGoWmrXvXt3dO/eHYmJifjXv/6lPHft2rUIDw9HeHi42mznY4cOHUJWVhZWrlyJBQsW4OjRowgNDdU4/tatW2Pjxo0AgOnTpyv//qSIiAisWLECYrEYmzdvRnBwMD799FOEhoaqFDRZWVmQSCR49913sWnTJpiYmODdd99VFoHViY2NxZw5c9CzZ0+sX78eM2bMwHfffYdx48bh0aNHeOuttxAREaGMo6YYn/TXX39hzJgxyMvLw8qVK7F8+XLcuHEDQUFByMvLg7m5OV555RUcOnRIpbA4deoU7t27h9dffx0AsGXLFixevBgikQhxcXEIDg7Gtm3blDFpIzIyEj179kRsbCwGDRqEZcuW4YsvvgAAvPnmm7hz547KfZDl5eU4dOgQAgIClEuh/0mhUGDq1KlITU3Fe++9B4lEgtDQUJw4ceKp8W3evBmvvPIKoqOjlT8eVOeHH37AV199hQ8//BBLly7FH3/8gXfeeQcPHjwAAKSlpSEoKAhyuRzLly/HsmXLkJOTgzFjxiA7O1vrz6g6//rXv1RyoLolqgqFAtOmTUN8fDzefPNNxMXFYdiwYVi3bp1a/++++w4//PADFi1ahOjoaNy/fx/vvvuu2v3KRESa4lJVIiJ6qpYtW6KiogKFhYVo2bKlyrH09HSYm5tjypQpMDc3BwA0b94cv/32GxQKBV588UXl/ZBPFodjxozBsGHDar22nZ0d4uPjlWPY29tjxowZOH78OAYOHPjU2M3MzNCtWzcAVctTq1tm++effyIpKQmzZ8/G9OnTAQADBgxA69atsWDBAhw7dgyDBw8GAEilUiQnJyuXulpZWWHs2LFIS0vDK6+8ojZ2UVERNm/ejLfeekvlP+67dOmC4OBgJCcn4+2331YuSXzxxRc1Xgq8ceNGWFhYICEhQfn5iEQiDB06FPHx8QgPD8frr7+OpKQknDlzBn379gUAHDhwAK6urvDw8IBUKsXmzZsxevRoLFq0CAAwcOBANG/eHIsWLcLEiRPRuXNnjeIBgMDAQOVGSIMGDUJubi42bdqEN998EwMHDkSbNm2wf/9+iEQiAMCRI0cglUoREBBQ7Xh3796FpaUlwsPDlUt4vb29cfPmTZVZ8OoIhUL8+9//fmrMDx48wFdffaX8Tjt27IiAgADs27cPwcHBiIqKgouLC+Lj42FsbAyg6jPy8/PDhg0bsG7dOo0+m9rY2Nio5EB1S1SPHTuGEydO4D//+Y9y2fiAAQNgYWGBmJgYjB8/XnleZWUlJBKJMi9KS0sRHh6OCxcuwN3dvd7xElHTwxlHIiLSWHUzQl5eXnj06BFGjBiBtWvXIj09HQMHDkRYWFi1/f9Jk10nBw8erLIRj1gshqmpKU6cOKH9G6jB6dOnAQAjRoxQaX/11VdhbGysMkPm4OCgcn9kmzZtAAAPHz6sduyMjAyUl5erje3p6YkXXnihXruQpqWlwdvbGxYWFqisrERlZSVsbGzg6emp/Hy8vLzwwgsv4ODBgwCqZvi+//57ZeHx66+/4uHDhxCLxcoxKisrlctlU1NTtYpp+PDhKq/9/Pxw584dXLlyBUZGRggICMDhw4eVn9e+ffvg7e2NF154odrxHB0dsWvXLnh6euL27ds4efIkPv30U5w9exYVFRW1xtKlSxeNYu7Vq5fKd9q9e3e4uLjgzJkzePDgAX777TcMHz5cWTQCVT9o+Pj4NOgusqdPn4axsbHaZ/z4u/xnLP/8wQao+hyBmvOUiOhpOONIRERPlZubCwsLCzRv3lztWK9evbB161YkJCRAIpEgLi4OrVq1wpQpUzB+/Phax23RosVTr/3kDKeRkRGaN2+O4uJird5DbR4vM23VqpVKu4mJCezt7SGVSpVtlpaWKn0eF8dP3rf35NhPvo/Hbf8cW1uFhYVISUlBSkqK2jEHBwdlfCNGjMAXX3yBxYsX49ixYyguLlYuU328KU9NM3OP71XV1JOf4ePv+PHn8MYbbyAuLg6HDx9G//79kZqaqrLbbXW+/vprREdHIycnB82bN0fXrl1hYWHx1Fiq+8yr07p1a7W2Fi1aoLi4GFKpFAqFQi/fn7aKiopgb28PExPV/3x7/JnXlqdGRlVzBTXlKRHR07BwJCKiWslkMpw+fRq9e/dWmXH5p0GDBmHQoEF4+PAh0tLSsGvXLqxYsQIeHh7o2bNnva7/ZIEok8lQUFCgLEgEAoHafVuP703TVLNmzQAA9+7dg7Ozs7K9oqICBQUFsLe3r0voKmPfv38fnTp1Ujl27949uLi41HlsW1tb9O/fHxMnTlQ79s/i4vXXX0dcXBxOnTqFb775Br1791Ze187ODkDVMyTbt2+vNo6mxddjT97ref/+fQB
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"air_pollution = dane.groupby(['Air Pollution', 'Level']).size()\n",
"\n",
"# Sortowanie danych według ryzyka genetycznego i liczby w odwrotnej kolejności\n",
"air_pollution_sorted = air_pollution.sort_index(level=['Air Pollution', 'Level'], key=lambda x: x.map({'High': 1, 'Medium': 2, 'Low': 3}))\n",
"\n",
"plt.figure(figsize=(10, 6))\n",
"\n",
"# Tworzenie wykresu słupkowego horyzontalnego\n",
"ax = air_pollution_sorted.plot(kind='barh')\n",
"\n",
"# Dodawanie wartości do słupków\n",
"for i, value in enumerate(air_pollution_sorted):\n",
" ax.text(value + 0.1, i, str(value), ha='left', va='center')\n",
"\n",
"# Ustawienia etykiet i tytułów\n",
"plt.xlabel('Count')\n",
"plt.ylabel('Air Pollution, Level')\n",
"plt.title('Distribution of level by air pollution')\n",
"\n",
"# Wyświetlenie wykresu\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "402f9c5a",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA6EAAANBCAYAAAARH7ioAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3xP5///8UemiMRKQ4zUFpsQYo+oImpWVRG1aY2iqFRrxygaq1ZFjagdm5ZSKa3G6Meo2aZWjBixUtnJ7w8/72/TRAR5nyjP++32vt2Sc673eV7n5G28cl3nOhZJSUlJiIiIiIiIiBjAMrM7ICIiIiIiIq8OFaEiIiIiIiJiGBWhIiIiIiIiYhgVoSIiIiIiImIYFaEiIiIiIiJiGBWhIiIiIiIiYhgVoSIiIiIiImIYFaEiIiIiIiJiGBWhIiKSqZKSkjK7C5JBXuaf5ct8biIiRlMRKiLyihs+fDheXl6P3e/j44OPj0+G58bGxjJx4kQ2b96c4cfODNu2baNBgwaUL1+ekSNHptl2//799OvXjzp16lCxYkUaN27MpEmTuHnzpkG9fbywsDDc3NwICgp6qvfNnTuXgIAA0/ezZs3Czc0to7uXqkdZ5cuXJzIyMtU2K1aswM3NLc3PemquXbtG7969uXz5cprtQkJCcHNzIyQk5KmOLyLyKlIRKiIimeL69essXryY+Pj4zO5KhhgzZgx58uRh4cKFdOvW7bHtpk2bRteuXbG2tmbEiBHMnz+fjh07snXrVtq1a/fEYudFNX36dKKiokzfv/POO6xatcrQPsTHx7Nr165U923btu2ZjvnLL7+wZ8+eJ7YrW7Ysq1atomzZss+UIyLyKrHO7A6IiIi8DO7cuUOtWrXw9PR8bJtt27axYMECfH196dKli2l79erVqV+/Pq1atWLcuHHMmzfPgB6bl4uLCy4uLoZmVq5cme3bt9OyZctk28PDwzl06BClS5fm3r17Zsl2cHCgUqVKZjm2iMjLRiOhIiLy1H744QfatGlD+fLlqVWrFuPHj+fBgwcp2nTo0AF3d3fKlStHkyZNCAwMBB5O+WzYsCEAvr6+pimSw4cPp3v37qxevZo33niDChUq0L59e86dO8ePP/5I8+bNqVixIu+88w6nTp1KlrdmzRratGlDpUqVqFChAi1btkw2+hUUFISbmxtHjx6ldevWVKhQgebNm6drhOz48eN0794dT09PKleuTJ8+ffjjjz+A/5uGCfDVV1/h5uZGWFhYqseZP38+xYsX5/3330+x7/XXX2fYsGFUqVKFxMREABISEli+fDnNmzenQoUK1K9fn6lTpxITE2N63/Dhw3n//fcZNWoUHh4etG7dmvj4eNzc3Jg9ezZvv/02VapUYc6cOQBcuXKFwYMHU61aNSpWrMj777/PyZMn0zz/gwcP0r17d6pWrUq5cuXw8vJi1qxZpn4+Ov/Zs2ebvk5tOu62bdto06YN7u7u1KpVi5EjR3L37l3T/lmzZtGoUSP27NlD8+bNKVeuHI0bN2b9+vVp9u8Rb29vfv75Z+7fv59s+3fffUeRIkUoVapUsu0JCQksWLCAt956iwoVKlCpUiXat2/P/v37gYefGV9fXwAaNmzI8OHDAfDy8mLChAm8//77VK5cmZEjRyabjhsZGYmXlxdNmjQhNjYWeHhPabdu3ahRo8YLMe1aRCQzqQgVERHg4VTG1F7/XpBl8+bN9O3bl6JFi/LVV1/Rr18/Nm3axIcffmhqu2fPHvr27UvZsmWZM2cOs2bNokCBAowbN47ffvuNPHnyMHv2bAA++OAD09cAR44cYdmyZQwfPpwJEybw559/0qtXLyZOnEjv3r2ZOHEiV69eZciQIab3LF++nJEjR9KwYUPmz5/PlClTsLGxYejQoVy5ciVZ/3v37k3Dhg2ZPXs2RYoUYfDgwY+dwgnw66+/8t5775GYmIifnx/jx4/n6tWrtG/fntDQUNM0TIC2bduyatUq8uTJk+I4N27c4PTp09SvXx8LC4tUs9q3b0/Pnj2xtHz4z/PIkSOZMGECXl5ezJ07l44dOxIYGJjsWgMcOnSICxcuMGvWLPr27Yu19cOJTnPnzqVx48Z8+eWXNGzYkIiICNq3b8+JEyf4/PPPmTZtGomJiXTs2JHQ0NBU+3T69Gm6dOlCzpw58ff3Z+7cuVSuXJnZs2ezdetWgBTnn5o5c+YwaNAgKlasyMyZM+nbty/ff/89Pj4+REdHJ7tOY8eOpXPnzixYsICCBQsyfPjwx/bvnxo3bkxCQkKKn+e2bdto1qxZivZTp07lq6++4t1332XhwoWMHTuW27dv89FHH/HgwQPq16/PBx98ADwssD/88EPTe5cvX46bmxuzZs1KMfLq4OCAn58f58+fN41qf/vtt/z888/4+fnx2muvPfFcREReZpqOKyIiXL58Oc172apVqwY8HM2ZOnUqderUYerUqab9hQsXpkuXLgQHB1O/fn3+/PNPWrVqxYgRI0xt3N3d8fT05ODBg1SuXJnSpUsDD0cAy5QpY2oXGRnJ9OnTKVasGAAHDhxg1apVLF68mBo1agAPF4uZPHky9+7dI3v27Fy6dIlu3brRt29f03EKFixImzZt+O2338ifP79pe6dOnejXrx8AderUoXXr1syZM8c0Mvtv06ZNw9XVlYULF2JlZQVA7dq1adSoEbNmzWL69OmmaZguLi6PnZJ57do1U7/S488//2Tt2rUMHDjQVAjVqlWLPHnyMGzYMH766Sfq1asHPPwFwpgxYyhUqFCyY1SoUIFevXqZvvf39+fOnTusWLGCAgUKAFC3bl28vb2ZMWMGM2fOTNGP06dPU7NmTaZMmWIqjmvVqsWePXs4ePAgzZs3f+L53717l7lz5/LOO+8watQo0/aSJUvSsWNHgoKC6NChAwBRUVH4+fmZftaFCxemQYMGBAcHmz4Tj/Paa69RtWpVvvvuO1q1agU8/GwfPXqUyZMnp5jmfP36dQYNGpRs4S07Ozv69+/PmTNncHd35/XXXwegdOnSyX52efLkYfjw4aZr8u8FiWrUqEGHDh1YsGABlSpVYurUqbz77rtPvTCSiMjLSEWoiIjg7OzM3LlzU933z6Lhr7/+Mq0W+s8FhapWrYqDgwM///wz9evXp0ePHgA8ePCAixcvcu7cOY4fPw5AXFxcmn3JkSNHsmLD2dkZIFlxkzNnTgBTEfpomuT9+/c5f/4858+fN02p/HfeP0etLCwsTMVkVFQUWbNmTdb2wYMHHD9+nL59+5oKUIDs2bObCqP0elSsPJrC+iQHDhwAoHnz5sm2N2vWDF9fX0JCQkxFqJ2dnalY+qeSJUsm+37//v2ULl2avHnzmn5+lpaW1K1bl02bNqXaj1atWtGqVStiYmK4ePEiFy5c4MSJEyQkJDzxZ/nIkSNHiI2NTXEuHh4eFChQgJCQEFMRCsl/1o/uK/33dO/H8fb2Zty4cdy/fx9HR0e2bt1K2bJlKVy4cIq206ZNAyAiIoILFy5w7tw5du/eDTz5c1qsWDHTz/RxhgwZwt69e+nduzevv/66aWqviMirTkWoiIhga2tL+fLlU92XLVs209d37twBHq4EO2bMmBRtr1+/Djz8T/2oUaP44YcfsLCwoFChQlSpUgV48vMWHRwcUt3+7wLxny5evMjIkSP59ddfsba2pmjRoqb7Ef+dlzdv3mTfOzk5kZSUxP3791Nk3L9/n6SkpFSnT7722msp7j1MS758+bCwsEhz9dt79+5hZWVFtmzZTPdKPirCH7G2tiZXrlzJsp2cnFKd4vvvft+5c4cLFy48dtT7n6vbPhIdHc24cePYuHEj8fHxFCxYEHd3d6ytrdP97MxH55Le6/jPn8OjQi+9WW+++SZjx47lhx9+oHXr1mzfvj1F8fvI8ePHGTNmDMePH8fOzo7ixYubRoiflJeeKbX29vY0btyYr7/+murVq6f5GRYReZWoCBURkXTLnj07AMOGDTNN0f2nHDlyAA9HgEJDQ/nmm2+oXLkytra2REVFsWbNmgzvU2JiIr169cLGxobVq1dTpkw
"text/plain": [
"<Figure size 1000x800 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"dane['Gender'] = dane['Gender'].replace({ 'Man':1, 'Woman':2})\n",
"data = dane.replace({'Level':{'High' : 3, 'Medium' : 2, 'Low' : 1}})\n",
"data = data.drop(['Patient Id'], axis=1)\n",
"\n",
"\n",
"# Stworzenie macierzy korelacji\n",
"corr_matrix = data.corr()\n",
"\n",
"# Stworzenie wykresu heatmapy\n",
"plt.figure(figsize=(10, 8))\n",
"sns.heatmap(corr_matrix, annot=False, cmap='coolwarm', fmt='.2f')\n",
"\n",
"# Dodanie tytułu\n",
"plt.title('Heatmap of Correlation Matrix')\n",
"\n",
"# Wyświetlenie wykresu\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "00915ec0",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import sklearn "
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "f02e1f34",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Patient Id</th>\n",
" <th>Age</th>\n",
" <th>Gender</th>\n",
" <th>Air Pollution</th>\n",
" <th>Alcohol use</th>\n",
" <th>Dust Allergy</th>\n",
" <th>OccuPational Hazards</th>\n",
" <th>Genetic Risk</th>\n",
" <th>chronic Lung Disease</th>\n",
" <th>Balanced Diet</th>\n",
" <th>...</th>\n",
" <th>Fatigue</th>\n",
" <th>Weight Loss</th>\n",
" <th>Shortness of Breath</th>\n",
" <th>Wheezing</th>\n",
" <th>Swallowing Difficulty</th>\n",
" <th>Clubbing of Finger Nails</th>\n",
" <th>Frequent Cold</th>\n",
" <th>Dry Cough</th>\n",
" <th>Snoring</th>\n",
" <th>Level</th>\n",
" </tr>\n",
" <tr>\n",
" <th>index</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>P1</td>\n",
" <td>33</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>P10</td>\n",
" <td>17</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>P100</td>\n",
" <td>35</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>...</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>P1000</td>\n",
" <td>37</td>\n",
" <td>1</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>P101</td>\n",
" <td>46</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 25 columns</p>\n",
"</div>"
],
"text/plain": [
" Patient Id Age Gender Air Pollution Alcohol use Dust Allergy \\\n",
"index \n",
"0 P1 33 1 2 4 5 \n",
"1 P10 17 1 3 1 5 \n",
"2 P100 35 1 4 5 6 \n",
"3 P1000 37 1 7 7 7 \n",
"4 P101 46 1 6 8 7 \n",
"\n",
" OccuPational Hazards Genetic Risk chronic Lung Disease \\\n",
"index \n",
"0 4 3 2 \n",
"1 3 4 2 \n",
"2 5 5 4 \n",
"3 7 6 7 \n",
"4 7 7 6 \n",
"\n",
" Balanced Diet ... Fatigue Weight Loss Shortness of Breath \\\n",
"index ... \n",
"0 2 ... 3 4 2 \n",
"1 2 ... 1 3 7 \n",
"2 6 ... 8 7 9 \n",
"3 7 ... 4 2 3 \n",
"4 7 ... 3 2 4 \n",
"\n",
" Wheezing Swallowing Difficulty Clubbing of Finger Nails \\\n",
"index \n",
"0 2 3 1 \n",
"1 8 6 2 \n",
"2 2 1 4 \n",
"3 1 4 5 \n",
"4 1 4 2 \n",
"\n",
" Frequent Cold Dry Cough Snoring Level \n",
"index \n",
"0 2 3 4 1 \n",
"1 1 7 2 2 \n",
"2 6 7 2 3 \n",
"3 6 7 5 3 \n",
"4 4 2 3 3 \n",
"\n",
"[5 rows x 25 columns]"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = dane.replace({'Level':{'High' : 3, 'Medium' : 2, 'Low' : 1}})\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "52632684",
"metadata": {},
"outputs": [],
"source": [
"import sklearn"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "a47f580a",
"metadata": {},
"outputs": [],
"source": [
"np.random.seed(10)\n",
"np.set_printoptions(precision=6, suppress=True)\n"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "7caae544",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Y shape: (1000,)\n",
"X shape: (1000, 23)\n"
]
}
],
"source": [
"X = data.drop(['Level', 'Patient Id'], axis=1)\n",
"y = data['Level']\n",
"\n",
"\n",
"print(\"Y shape:\", y.shape)\n",
"print(\"X shape:\", X.shape)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "9139408a",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split (X, y)\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "2f45152a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"X_train shape: (750, 23)\n",
"y_train shape: (750,)\n",
"X_test shape: (250, 23)\n",
"y_test shape: (250,)\n"
]
}
],
"source": [
"print(\"X_train shape:\", X_train.shape)\n",
"print(\"y_train shape:\", y_train.shape)\n",
"print(\"X_test shape:\", X_test.shape)\n",
"print(\"y_test shape:\", y_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "8ba2674d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\HP\\anaconda3\\lib\\site-packages\\sklearn\\linear_model\\_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
"Please also refer to the documentation for alternative solver options:\n",
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n"
]
},
{
"data": {
"text/html": [
"<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-r
],
"text/plain": [
"LogisticRegression(max_iter=200)"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"classifier = LogisticRegression(max_iter = 200)\n",
"\n",
"\n",
"#from sklearn.preprocessing import StandardScaler\n",
"\n",
"# Skalowanie danych\n",
"#scaler = StandardScaler()\n",
"classifier.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "ba0a5bda",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0.01354 , 0.98631 , 0.00015 ],\n",
" [0. , 0.000005, 0.999995],\n",
" [0.000005, 0.00068 , 0.999315],\n",
" [0.999999, 0.000001, 0. ],\n",
" [0.000003, 0.000947, 0.99905 ],\n",
" [0.001809, 0.982182, 0.016009],\n",
" [0. , 0.012048, 0.987952],\n",
" [0.059547, 0.831086, 0.109367],\n",
" [0. , 0.075388, 0.924612],\n",
" [0.039136, 0.960851, 0.000013],\n",
" [0.000022, 0.003654, 0.996323],\n",
" [0.006091, 0.993801, 0.000108],\n",
" [0.017895, 0.982104, 0.000001],\n",
" [0.998827, 0.001173, 0. ],\n",
" [0.928696, 0.070643, 0.000662],\n",
" [0.033735, 0.966263, 0.000003],\n",
" [0. , 0.000113, 0.999887],\n",
" [0.934007, 0.009674, 0.056319],\n",
" [0. , 0.000031, 0.999969],\n",
" [0.999648, 0.000306, 0.000047],\n",
" [0.000004, 0.00404 , 0.995956],\n",
" [0.999251, 0.000216, 0.000533],\n",
" [0. , 0.00512 , 0.99488 ],\n",
" [0. , 0.004695, 0.995305],\n",
" [0. , 0.000124, 0.999876],\n",
" [0.1259 , 0.859667, 0.014433],\n",
" [0.059547, 0.831086, 0.109367],\n",
" [0.991172, 0.008807, 0.000021],\n",
" [0.999909, 0.00009 , 0.000001],\n",
" [0.1259 , 0.859667, 0.014433],\n",
" [0.976724, 0.023247, 0.000028],\n",
" [0.008947, 0.990345, 0.000708],\n",
" [0. , 0.004695, 0.995305],\n",
" [0.999251, 0.000216, 0.000533],\n",
" [0.005118, 0.994787, 0.000095],\n",
" [0. , 0.00008 , 0.99992 ],\n",
" [0.015368, 0.98458 , 0.000052],\n",
" [0. , 0.000003, 0.999997],\n",
" [0.00583 , 0.994147, 0.000023],\n",
" [0.993283, 0.00006 , 0.006656],\n",
" [0.1259 , 0.859667, 0.014433],\n",
" [0.004655, 0.989938, 0.005407],\n",
" [0. , 0.000134, 0.999866],\n",
" [0.000007, 0.999991, 0.000002],\n",
" [0. , 0.012048, 0.987952],\n",
" [0.000022, 0.003654, 0.996323],\n",
" [0.931524, 0.068283, 0.000192],\n",
" [0. , 0.000031, 0.999969],\n",
" [0. , 0.000165, 0.999835],\n",
" [0.009961, 0.988549, 0.00149 ],\n",
" [0.006484, 0.974177, 0.019339],\n",
" [0.00523 , 0.994198, 0.000572],\n",
" [0.021549, 0.975853, 0.002598],\n",
" [0. , 0.000012, 0.999988],\n",
" [0.009961, 0.988549, 0.00149 ],\n",
" [0.000001, 0.000025, 0.999974],\n",
" [0. , 0.016222, 0.983778],\n",
" [0.00003 , 0.010307, 0.989664],\n",
" [0.993286, 0.006713, 0.000001],\n",
" [0.000175, 0.040597, 0.959228],\n",
" [0.000004, 0.00404 , 0.995956],\n",
" [0.984367, 0.012436, 0.003197],\n",
" [0.000005, 0.00068 , 0.999315],\n",
" [0.967662, 0.024355, 0.007983],\n",
" [0. , 0.000134, 0.999866],\n",
" [0.976724, 0.023247, 0.000028],\n",
" [0.007089, 0.99289 , 0.000021],\n",
" [0. , 0.000165, 0.999835],\n",
" [0. , 0.00512 , 0.99488 ],\n",
" [0.016788, 0.983198, 0.000014],\n",
" [0.988196, 0.002318, 0.009486],\n",
" [0.999947, 0.000021, 0.000032],\n",
" [0.990009, 0.009991, 0. ],\n",
" [0.016788, 0.983198, 0.000014],\n",
" [0.015399, 0.984596, 0.000005],\n",
" [0.022511, 0.975873, 0.001616],\n",
" [0.008947, 0.990345, 0.000708],\n",
" [0.007156, 0.985278, 0.007566],\n",
" [0. , 0.00068 , 0.99932 ],\n",
" [0.991172, 0.008807, 0.000021],\n",
" [0. , 0.000106, 0.999894],\n",
" [0.993286, 0.006713, 0.000001],\n",
" [0. , 0.004695, 0.995305],\n",
" [0.006484, 0.974177, 0.019339],\n",
" [0. , 0.00001 , 0.99999 ],\n",
" [0. , 0.03253 , 0.967469],\n",
" [0. , 0.000014, 0.999986],\n",
" [0.000011, 0.999989, 0. ],\n",
" [0.991172, 0.008807, 0.000021],\n",
" [0. , 0.00068 , 0.99932 ],\n",
" [0.013247, 0.986728, 0.000025],\n",
" [0.003605, 0.971443, 0.024952],\n",
" [0.999999, 0.000001, 0. ],\n",
" [0.931524, 0.068283, 0.000192],\n",
" [0.00583 , 0.994147, 0.000023],\n",
" [0.00583 , 0.994147, 0.000023],\n",
" [0.1259 , 0.859667, 0.014433],\n",
" [0.022062, 0.977641, 0.000297],\n",
" [0.016788, 0.983198, 0.000014],\n",
" [0.957888, 0.04211 , 0.000001],\n",
" [0. , 0.016222, 0.983778],\n",
" [0.000003, 0.999994, 0.000003],\n",
" [0.9999 , 0.0001 , 0. ],\n",
" [0. , 0.012048, 0.987952],\n",
" [0.999909, 0.00009 , 0.000001],\n",
" [0.928696, 0.070643, 0.000662],\n",
" [0.931524, 0.068283, 0.000192],\n",
" [0.000003, 0.99999 , 0.000007],\n",
" [0. , 0.004695, 0.995305],\n",
" [0. , 0.000014, 0.999986],\n",
" [0.993283, 0.00006 , 0.006656],\n",
" [0.000003, 0.000947, 0.99905 ],\n",
" [0.008109, 0.979381, 0.01251 ],\n",
" [0.934007, 0.009674, 0.056319],\n",
" [0.999648, 0.000306, 0.000047],\n",
" [0.000022, 0.003654, 0.996323],\n",
" [0.013247, 0.986728, 0.000025],\n",
" [0.000011, 0.999989, 0. ],\n",
" [0.993286, 0.006713, 0.000001],\n",
" [0.9999 , 0.0001 , 0. ],\n",
" [0. , 0.000113, 0.999887],\n",
" [0. , 0.000063, 0.999937],\n",
" [0. , 0.00512 , 0.99488 ],\n",
" [0.011601, 0.988358, 0.000041],\n",
" [0.999909, 0.00009 , 0.000001],\n",
" [0.000005, 0.00068 , 0.999315],\n",
" [0.000175, 0.040597, 0.959228],\n",
" [0.990009, 0.009991, 0. ],\n",
" [0.978169, 0.021712, 0.000119],\n",
" [0.015368, 0.98458 , 0.000052],\n",
" [0.022062, 0.977641, 0.000297],\n",
" [0. , 0.021094, 0.978906],\n",
" [0.973999, 0.013972, 0.012029],\n",
" [0. , 0.004695, 0.995305],\n",
" [0.996352, 0.003647, 0.000001],\n",
" [0.967662, 0.024355, 0.007983],\n",
" [0.985581, 0.000103, 0.014316],\n",
" [0. , 0.00512 , 0.99488 ],\n",
" [0.984367, 0.012436, 0.003197],\n",
" [0. , 0.000468, 0.999532],\n",
" [0.008109, 0.979381, 0.01251 ],\n",
" [0.991172, 0.008807, 0.000021],\n",
" [0.013247, 0.986728, 0.000025],\n",
" [0.013247, 0.986728, 0.000025],\n",
" [0.999999, 0.000001, 0. ],\n",
" [0.985581, 0.000103, 0.014316],\n",
" [0.007156, 0.985278, 0.007566],\n",
" [0.000022, 0.003654, 0.996323],\n",
" [0.999947, 0.000021, 0.000032],\n",
" [0.973999, 0.013972, 0.012029],\n",
" [0.000022, 0.003654, 0.996323],\n",
" [0.993286, 0.006713, 0.000001],\n",
" [0.1259 , 0.859667, 0.014433],\n",
" [0.008803, 0.991195, 0.000002],\n",
" [0.008947, 0.990345, 0.000708],\n",
" [0. , 0.002567, 0.997433],\n",
" [0.999999, 0.000001, 0. ],\n",
" [0. , 0.004695, 0.995305],\n",
" [0. , 0.000013, 0.999987],\n",
" [0.98178 , 0.01822 , 0. ],\n",
" [0.891574, 0.108425, 0. ],\n",
" [0.006622, 0.993257, 0.000122],\n",
" [0.972232, 0.027758, 0.00001 ],\n",
" [0.008109, 0.979381, 0.01251 ],\n",
" [0.928696, 0.070643, 0.000662],\n",
" [0. , 0.000196, 0.999804],\n",
" [0.000003, 0.999994, 0.000003],\n",
" [0.991172, 0.008807, 0.000021],\n",
" [0.021549, 0.975853, 0.002598],\n",
" [0.999947, 0.000021, 0.000032],\n",
" [0. , 0.000134, 0.999866],\n",
" [0.013247, 0.986728, 0.000025],\n",
" [0. , 0.00512 , 0.99488 ],\n",
" [0.991172, 0.008807, 0.000021],\n",
" [0. , 0.00512 , 0.99488 ],\n",
" [0.153033, 0.8185 , 0.028467],\n",
" [0.008947, 0.990345, 0.000708],\n",
" [0.009961, 0.988549, 0.00149 ],\n",
" [0.015399, 0.984596, 0.000005],\n",
" [0.000011, 0.999989, 0. ],\n",
" [0.022062, 0.977641, 0.000297],\n",
" [0.999999, 0.000001, 0. ],\n",
" [0.967662, 0.024355, 0.007983],\n",
" [0.006622, 0.993257, 0.000122],\n",
" [0.015399, 0.984596, 0.000005],\n",
" [0. , 0.000196, 0.999804],\n",
" [0. , 0.016222, 0.983778],\n",
" [0. , 0.000124, 0.999876],\n",
" [0. , 0.001328, 0.998672],\n",
" [0.000011, 0.999989, 0. ],\n",
" [0.022511, 0.975873, 0.001616],\n",
" [0.999251, 0.000216, 0.000533],\n",
" [0.999999, 0.000001, 0. ],\n",
" [0. , 0.03253 , 0.967469],\n",
" [0. , 0.000063, 0.999937],\n",
" [0.993283, 0.00006 , 0.006656],\n",
" [0.007089, 0.99289 , 0.000021],\n",
" [0.009961, 0.988549, 0.00149 ],\n",
" [0.931524, 0.068283, 0.000192],\n",
" [0.007156, 0.985278, 0.007566],\n",
" [0.006622, 0.993257, 0.000122],\n",
" [0.021549, 0.975853, 0.002598],\n",
" [0. , 0.000003, 0.999997],\n",
" [0.009393, 0.990606, 0.000001],\n",
" [0. , 0.001767, 0.998233],\n",
" [0.000175, 0.040597, 0.959228],\n",
" [0.931524, 0.068283, 0.000192],\n",
" [0.01354 , 0.98631 , 0.00015 ],\n",
" [0.000011, 0.999989, 0. ],\n",
" [0.07487 , 0.902033, 0.023097],\n",
" [0. , 0.000113, 0.999887],\n",
" [0.999999, 0.000001, 0. ],\n",
" [0. , 0.000031, 0.999969],\n",
" [0. , 0.0011 , 0.9989 ],\n",
" [0.891574, 0.108425, 0. ],\n",
" [0.957888, 0.04211 , 0.000001],\n",
" [0.004898, 0.993937, 0.001166],\n",
" [0. , 0. , 1. ],\n",
" [0.891574, 0.108425, 0. ],\n",
" [0.990009, 0.009991, 0. ],\n",
" [0.976724, 0.023247, 0.000028],\n",
" [0.000007, 0.999991, 0.000002],\n",
" [0.891574, 0.108425, 0. ],\n",
" [0. , 0.000014, 0.999986],\n",
" [0.015399, 0.984596, 0.000005],\n",
" [0.003605, 0.971443, 0.024952],\n",
" [0.1259 , 0.859667, 0.014433],\n",
" [0.991172, 0.008807, 0.000021],\n",
" [0. , 0.00001 , 0.99999 ],\n",
" [0.004655, 0.989938, 0.005407],\n",
" [0. , 0.000063, 0.999937],\n",
" [0.999999, 0.000001, 0. ],\n",
" [0.007156, 0.985278, 0.007566],\n",
" [0.000007, 0.999991, 0.000002],\n",
" [0. , 0.001638, 0.998362],\n",
" [0.006622, 0.993257, 0.000122],\n",
" [0. , 0.000468, 0.999532],\n",
" [0.978169, 0.021712, 0.000119],\n",
" [0.891574, 0.108425, 0. ],\n",
" [0. , 0.000196, 0.999804],\n",
" [0.934007, 0.009674, 0.056319],\n",
" [0.008109, 0.979381, 0.01251 ],\n",
" [0.000004, 0.00404 , 0.995956],\n",
" [0.022511, 0.975873, 0.001616],\n",
" [0. , 0.03253 , 0.967469],\n",
" [0.00007 , 0.024082, 0.975848],\n",
" [0. , 0.000031, 0.999969],\n",
" [0. , 0.012048, 0.987952],\n",
" [0.999947, 0.000021, 0.000032],\n",
" [0.004898, 0.993937, 0.001166]])"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_prob = classifier.predict_proba(X_test)\n",
"y_prob"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "08f121e4",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'X_test' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[3], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m y_pred \u001b[38;5;241m=\u001b[39m classifier\u001b[38;5;241m.\u001b[39mpredict(\u001b[43mX_test\u001b[49m)\n\u001b[0;32m 2\u001b[0m y_pred\n",
"\u001b[1;31mNameError\u001b[0m: name 'X_test' is not defined"
]
}
],
"source": [
"y_pred = classifier.predict(X_test)\n",
"y_pred"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "322588b1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: mlxtend in c:\\users\\hp\\anaconda3\\lib\\site-packages (0.23.0)\n",
"Requirement already satisfied: scipy>=1.2.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from mlxtend) (1.11.4)\n",
"Requirement already satisfied: numpy>=1.16.2 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from mlxtend) (1.26.0)\n",
"Requirement already satisfied: pandas>=0.24.2 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from mlxtend) (2.1.1)\n",
"Requirement already satisfied: scikit-learn>=1.0.2 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from mlxtend) (1.3.2)\n",
"Requirement already satisfied: matplotlib>=3.0.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from mlxtend) (3.8.0)\n",
"Requirement already satisfied: joblib>=0.13.2 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from mlxtend) (1.3.2)\n",
"Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (1.2.0)\n",
"Requirement already satisfied: cycler>=0.10 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (0.12.1)\n",
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (4.25.0)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (1.4.4)\n",
"Requirement already satisfied: packaging>=20.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (23.1)\n",
"Requirement already satisfied: pillow>=6.2.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (10.0.1)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (3.0.9)\n",
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from matplotlib>=3.0.0->mlxtend) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from pandas>=0.24.2->mlxtend) (2023.3.post1)\n",
"Requirement already satisfied: tzdata>=2022.1 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from pandas>=0.24.2->mlxtend) (2023.3)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from scikit-learn>=1.0.2->mlxtend) (3.2.0)\n",
"Requirement already satisfied: six>=1.5 in c:\\users\\hp\\anaconda3\\lib\\site-packages (from python-dateutil>=2.7->matplotlib>=3.0.0->mlxtend) (1.16.0)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"[notice] A new release of pip is available: 23.3.2 -> 24.0\n",
"[notice] To update, run: python.exe -m pip install --upgrade pip\n"
]
}
],
"source": [
"pip install mlxtend"
]
},
{
"cell_type": "code",
"execution_count": 446,
"id": "c876fff8",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import confusion_matrix\n",
"from sklearn.metrics import accuracy_score\n",
"\n",
"from mlxtend.plotting import plot_confusion_matrix\n",
"import seaborn as sns\n",
"sns.set()\n"
]
},
{
"cell_type": "code",
"execution_count": 447,
"id": "cbb6c719",
"metadata": {},
"outputs": [],
"source": [
"#cm = confusion_matrix(y_test, y_pred)\n",
"#plot_confusion_matrix(cm)\n",
"\n",
"#acc = accuracy_score(y_test, y_pred)\n",
"#print('Accuracy',':', acc)"
]
},
{
"cell_type": "code",
"execution_count": 448,
"id": "e3c2afe8",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"colorscale": [
[
0,
"rgb(3, 5, 18)"
],
[
0.09090909090909091,
"rgb(25, 25, 51)"
],
[
0.18181818181818182,
"rgb(44, 42, 87)"
],
[
0.2727272727272727,
"rgb(58, 60, 125)"
],
[
0.36363636363636365,
"rgb(62, 83, 160)"
],
[
0.45454545454545453,
"rgb(62, 109, 178)"
],
[
0.5454545454545454,
"rgb(72, 134, 187)"
],
[
0.6363636363636364,
"rgb(89, 159, 196)"
],
[
0.7272727272727273,
"rgb(114, 184, 205)"
],
[
0.8181818181818182,
"rgb(149, 207, 216)"
],
[
0.9090909090909091,
"rgb(192, 229, 232)"
],
[
1,
"rgb(234, 252, 253)"
]
],
"reversescale": true,
"showscale": true,
"type": "heatmap",
"x": [
"pred_1",
"pred_2",
"pred_3"
],
"y": [
"true_3",
"true_2",
"true_1"
],
"z": [
[
0,
0,
85
],
[
0,
89,
0
],
[
76,
0,
0
]
]
}
],
"layout": {
"annotations": [
{
"font": {
"color": "#000000"
},
"showarrow": false,
"text": "0",
"x": "pred_1",
"xref": "x",
"y": "true_3",
"yref": "y"
},
{
"font": {
"color": "#000000"
},
"showarrow": false,
"text": "0",
"x": "pred_2",
"xref": "x",
"y": "true_3",
"yref": "y"
},
{
"font": {
"color": "#FFFFFF"
},
"showarrow": false,
"text": "85",
"x": "pred_3",
"xref": "x",
"y": "true_3",
"yref": "y"
},
{
"font": {
"color": "#000000"
},
"showarrow": false,
"text": "0",
"x": "pred_1",
"xref": "x",
"y": "true_2",
"yref": "y"
},
{
"font": {
"color": "#FFFFFF"
},
"showarrow": false,
"text": "89",
"x": "pred_2",
"xref": "x",
"y": "true_2",
"yref": "y"
},
{
"font": {
"color": "#000000"
},
"showarrow": false,
"text": "0",
"x": "pred_3",
"xref": "x",
"y": "true_2",
"yref": "y"
},
{
"font": {
"color": "#FFFFFF"
},
"showarrow": false,
"text": "76",
"x": "pred_1",
"xref": "x",
"y": "true_1",
"yref": "y"
},
{
"font": {
"color": "#000000"
},
"showarrow": false,
"text": "0",
"x": "pred_2",
"xref": "x",
"y": "true_1",
"yref": "y"
},
{
"font": {
"color": "#000000"
},
"showarrow": false,
"text": "0",
"x": "pred_3",
"xref": "x",
"y": "true_1",
"yref": "y"
}
],
"font": {
"size": 16
},
"height": 500,
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"fillpattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Confusion Matrix - Accuracy: 0.9920"
},
"width": 500,
"xaxis": {
"dtick": 1,
"gridcolor": "rgb(0, 0, 0)",
"side": "top",
"ticks": ""
},
"yaxis": {
"dtick": 1,
"ticks": "",
"ticksuffix": " "
}
}
}
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"cm = confusion_matrix(y_test, y_pred)\n",
"def plot_confusion_matrix(cm):\n",
" cm = cm[::-1]\n",
" cm = pd.DataFrame(cm, columns=['pred_1', 'pred_2', 'pred_3'], index=['true_3', 'true_2', 'true_1'])\n",
" fig = ff.create_annotated_heatmap(z = cm.values, x = list(cm.columns), y = list(cm.index), colorscale = 'ice', showscale = True, reversescale = True)\n",
" fig.update_layout(width=500, height=500, title=f'Confusion Matrix - Accuracy: {acc:.4f}', font_size=16)\n",
" fig.show()\n",
"\n",
"plot_confusion_matrix(cm)"
]
},
{
"cell_type": "code",
"execution_count": 449,
"id": "a1ffeb65",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" pred_1 1.00 1.00 1.00 76\n",
" pred_2 1.00 1.00 1.00 89\n",
" pred_3 1.00 1.00 1.00 85\n",
"\n",
" accuracy 1.00 250\n",
" macro avg 1.00 1.00 1.00 250\n",
"weighted avg 1.00 1.00 1.00 250\n",
"\n"
]
}
],
"source": [
"from sklearn.metrics import classification_report\n",
"print(classification_report(y_test, y_pred, target_names=['pred_1', 'pred_2', 'pred_3']))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}